fix memory remove dup

Signed-off-by: ale <ale@manalejandro.com>
2025-12-21 22:36:31 +01:00
--- a/API.md
+++ b/API.md
@@ -102,7 +102,7 @@ Content-Type: application/json
 }
 ```

-Note: When plaintext is provided, it is automatically stored in Redis for future lookups.
+Note: When plaintext is provided, it is automatically indexed in Elasticsearch for future lookups.

 #### Error Responses

@@ -113,7 +113,7 @@ Note: When plaintext is provided, it is automatically stored in Redis for future
 }
 ```

-**500 Internal Server Error** - Server or Redis error:
+**500 Internal Server Error** - Server or Elasticsearch error:
 ```json
 {
  "error": "Internal server error",
@@ -127,7 +127,7 @@ Note: When plaintext is provided, it is automatically stored in Redis for future

 **Endpoint**: `GET /api/health`

-**Description**: Check the health of the application and Redis connection.
+**Description**: Check the health of the application and Elasticsearch connection.

 #### Request

@@ -139,28 +139,31 @@ No parameters required.
 ```json
 {
  "status": "ok",
-  "redis": {
-    "version": "7.2.0",
-    "memory": "1.5M",
-    "dbSize": 1542
+  "elasticsearch": {
+    "cluster": "elasticsearch",
+    "status": "green"
  },
-  "stats": {
-    "count": 1542,
-    "size": 524288
+  "index": {
+    "exists": true,
+    "name": "hasher",
+    "stats": {
+      "documentCount": 1542,
+      "indexSize": 524288
+    }
  }
 }
 ```

-**Redis status fields**:
- `version`: Redis server version
- `memory`: Memory used by Redis
- `dbSize`: Total number of keys in database
+**Elasticsearch cluster status values**:
+- `green`: All primary and replica shards are active
+- `yellow`: All primary shards are active, but not all replicas
+- `red`: Some primary shards are not active

 **Error** (503 Service Unavailable):
 ```json
 {
  "status": "error",
-  "error": "Connection refused to Redis"
+  "error": "Connection refused to Elasticsearch"
 }
 ```

@@ -249,7 +252,7 @@ The API accepts requests from any origin by default. For production deployment,
 ## Notes

 - All timestamps are in ISO 8601 format
- The API automatically creates Redis keys with proper structure
- Plaintext searches are automatically stored for future lookups
+- The API automatically creates the Elasticsearch index if it doesn't exist
+- Plaintext searches are automatically indexed for future lookups
 - Searches are case-insensitive
 - Hashes must be valid hexadecimal strings
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,37 +5,6 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

-## [2.0.0] - 2025-12-03
-
-### Changed
-
-#### Major Backend Migration
- **Breaking Change**: Migrated from Elasticsearch to Redis for improved performance
- Replaced Elasticsearch Client with ioredis for Redis operations
- Redesigned data structure using Redis key patterns
- Implemented O(1) hash lookups using Redis indexes
- Significantly reduced search latency (< 10ms typical)
-
-#### New Redis Architecture
- Document storage: `hash:plaintext:{plaintext}` keys
- Hash indexes: `hash:index:{algorithm}:{hash}` for fast lookups
- Statistics tracking: `hash:stats` Redis Hash
- Pipeline operations for atomic batch writes
- Connection pooling with automatic retry strategy
-
-### Updated
-
-#### Configuration
- Environment variables changed from `ELASTICSEARCH_NODE` to `REDIS_HOST`, `REDIS_PORT`, `REDIS_PASSWORD`, `REDIS_DB`
- Simplified connection setup with sensible defaults
- Optional Redis authentication support
-
-#### Performance Improvements
- Search latency reduced to < 10ms (from ~50ms)
- Bulk indexing maintained at 1000-5000 docs/sec
- Lower memory footprint
- Better concurrent request handling (100+ users)
-
 ## [1.0.0] - 2025-12-03

 ### Added
@@ -48,12 +17,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Copy to clipboard functionality for all hash values

 #### Backend
- Redis integration with ioredis
- Key-value storage with hash indexes
- Automatic key structure initialization
- Auto-storage of searched plaintext for future lookups
+- Elasticsearch integration with configurable endpoint
+- Custom index mapping with 10 shards for horizontal scaling
+- Automatic index creation on first use
+- Auto-indexing of searched plaintext for future lookups
 - RESTful API endpoints for search and health checks
- Case-insensitive searches
+- Lowercase analyzer for case-insensitive searches

 #### Frontend
 - Modern, responsive UI with gradient design
@@ -93,7 +62,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 #### Dependencies
 - Next.js 16.0.7
 - React 19.2.0
- ioredis 5.4.2
+- Elasticsearch Client 8.x
 - Lucide React (icons)
 - Tailwind CSS 4.x
 - TypeScript 5.x
@@ -106,35 +75,28 @@ hasher/
 │   ├── layout.tsx         # Root layout
 │   └── page.tsx           # Main page
 ├── lib/                   # Utility libraries
-│   ├── redis.ts           # Redis client
+│   ├── elasticsearch.ts   # ES client
 │   └── hash.ts           # Hash utilities
 ├── scripts/              # CLI scripts
-│   ├── index-file.ts     # Bulk indexer
-│   └── remove-duplicates.ts  # Duplicate removal
+│   └── index-file.ts     # Bulk indexer
 └── docs/                 # Documentation
 ```

-#### Redis Data Structure
- Main documents: `hash:plaintext:{plaintext}`
- MD5 index: `hash:index:md5:{hash}`
- SHA1 index: `hash:index:sha1:{hash}`
- SHA256 index: `hash:index:sha256:{hash}`
- SHA512 index: `hash:index:sha512:{hash}`
- Statistics: `hash:stats` (Redis Hash with count and size)
+#### Elasticsearch Index Schema
+- Index name: `hasher`
+- Shards: 10
+- Replicas: 1
+- Fields: plaintext, md5, sha1, sha256, sha512, created_at

 ### Configuration

 #### Environment Variables
- `REDIS_HOST`: Redis host (default: localhost)
- `REDIS_PORT`: Redis port (default: 6379)
- `REDIS_PASSWORD`: Redis password (optional)
- `REDIS_DB`: Redis database number (default: 0)
+- `ELASTICSEARCH_NODE`: Elasticsearch endpoint (default: http://localhost:9200)

 #### Performance
 - Bulk indexing: 1000-5000 docs/sec
- Search latency: < 10ms typical (O(1) lookups)
- Horizontal scaling ready with Redis Cluster
- Lower memory footprint than Elasticsearch
+- Search latency: < 50ms typical
+- Horizontal scaling ready

 ### Security
 - Input validation on all endpoints
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -16,7 +16,7 @@ Thank you for considering contributing to Hasher! This document provides guideli
 ## 🎯 Areas for Contribution

 ### Features
- Additional hash algorithms (argon2, etc.)
+- Additional hash algorithms (bcrypt validation, argon2, etc.)
 - Export functionality (CSV, JSON)
 - Search history
 - Batch hash lookup
@@ -48,7 +48,7 @@ Thank you for considering contributing to Hasher! This document provides guideli
 Before submitting a PR:
 1. Test the web interface thoroughly
 2. Test the bulk indexing script
-3. Verify Redis integration
+3. Verify Elasticsearch integration
 4. Check for TypeScript errors: `npm run build`
 5. Run linter: `npm run lint`

--- a/DEPLOYMENT.md
+++ b/DEPLOYMENT.md
@@ -5,7 +5,7 @@ This guide covers deploying the Hasher application to production.
 ## Prerequisites

 - Node.js 18.x or higher
- Redis 6.x or higher
+- Elasticsearch 8.x cluster
 - Domain name (optional, for custom domain)
 - SSL certificate (recommended for production)

@@ -34,16 +34,12 @@ Vercel provides seamless deployment for Next.js applications.

 4. **Set Environment Variables**:
   - Go to your project settings on Vercel
-   - Add environment variables:
-     - `REDIS_HOST=your-redis-host`
-     - `REDIS_PORT=6379`
-     - `REDIS_PASSWORD=your-password` (if using authentication)
-     - `REDIS_DB=0`
+   - Add environment variable: `ELASTICSEARCH_NODE=http://your-elasticsearch-host:9200`
   - Redeploy: `vercel --prod`

 #### Important Notes:
- Ensure Redis is accessible from Vercel's servers
- Consider using Redis Cloud (Upstash) or a publicly accessible Redis instance
+- Ensure Elasticsearch is accessible from Vercel's servers
+- Consider using Elastic Cloud or a publicly accessible Elasticsearch instance
 - Use environment variables for sensitive configuration

 ---
@@ -120,8 +116,7 @@ docker build -t hasher:latest .
 # Run the container
 docker run -d \
  -p 3000:3000 \
-  -e REDIS_HOST=redis \
-  -e REDIS_PORT=6379 \
+  -e ELASTICSEARCH_NODE=http://elasticsearch:9200 \
  --name hasher \
  hasher:latest
 ```
@@ -139,23 +134,25 @@ services:
    ports:
      - "3000:3000"
    environment:
-      - REDIS_HOST=redis
-      - REDIS_PORT=6379
+      - ELASTICSEARCH_NODE=http://elasticsearch:9200
    depends_on:
-      - redis
+      - elasticsearch
    restart: unless-stopped

-  redis:
-    image: redis:7-alpine
+  elasticsearch:
+    image: docker.elastic.co/elasticsearch/elasticsearch:8.11.0
+    environment:
+      - discovery.type=single-node
+      - xpack.security.enabled=false
+      - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
    ports:
-      - "6379:6379"
+      - "9200:9200"
    volumes:
-      - redis-data:/data
+      - elasticsearch-data:/usr/share/elasticsearch/data
    restart: unless-stopped
-    command: redis-server --appendonly yes

 volumes:
-  redis-data:
+  elasticsearch-data:
 ```

 Run with:
@@ -196,10 +193,7 @@ npm run build

 ```bash
 cat > .env.local << EOF
-REDIS_HOST=localhost
-REDIS_PORT=6379
-REDIS_PASSWORD=your-password
-REDIS_DB=0
+ELASTICSEARCH_NODE=http://localhost:9200
 NODE_ENV=production
 EOF
 ```
@@ -239,43 +233,43 @@ sudo systemctl reload nginx

 ---

-## Redis Setup
+## Elasticsearch Setup

-### Option 1: Redis Cloud (Managed)
+### Option 1: Elastic Cloud (Managed)

-1. Sign up at [Redis Cloud](https://redis.com/try-free/) or [Upstash](https://upstash.com/)
-2. Create a database
-3. Note the connection details (host, port, password)
-4. Update `REDIS_HOST`, `REDIS_PORT`, and `REDIS_PASSWORD` environment variables
+1. Sign up at [Elastic Cloud](https://cloud.elastic.co/)
+2. Create a deployment
+3. Note the endpoint URL
+4. Update `ELASTICSEARCH_NODE` environment variable

 ### Option 2: Self-Hosted

 ```bash
 # Ubuntu/Debian
+wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | sudo apt-key add -
+sudo sh -c 'echo "deb https://artifacts.elastic.co/packages/8.x/apt stable main" > /etc/apt/sources.list.d/elastic-8.x.list'
 sudo apt-get update
-sudo apt-get install redis-server
+sudo apt-get install elasticsearch

 # Configure
-sudo nano /etc/redis/redis.conf
-# Set: bind 0.0.0.0  (to allow remote connections)
-# Set: requirepass your-strong-password  (for security)
+sudo nano /etc/elasticsearch/elasticsearch.yml
+# Set: network.host: 0.0.0.0

 # Start
-sudo systemctl start redis-server
-sudo systemctl enable redis-server
+sudo systemctl start elasticsearch
+sudo systemctl enable elasticsearch
 ```

 ---

 ## Security Considerations

-### 1. Redis Security
+### 1. Elasticsearch Security

- Enable authentication with requirepass
- Use TLS for Redis connections (Redis 6+)
+- Enable authentication on Elasticsearch
+- Use HTTPS for Elasticsearch connection
 - Restrict network access with firewall rules
 - Update credentials regularly
- Disable dangerous commands (FLUSHDB, FLUSHALL, etc.)

 ### 2. Application Security

@@ -291,7 +285,7 @@ sudo systemctl enable redis-server
 # Example UFW firewall rules
 sudo ufw allow 80/tcp
 sudo ufw allow 443/tcp
-sudo ufw allow from YOUR_IP to any port 6379  # Redis
+sudo ufw allow from YOUR_IP to any port 9200  # Elasticsearch
 sudo ufw enable
 ```

@@ -309,48 +303,37 @@ pm2 monit
 pm2 logs hasher
 ```

-### Redis Monitoring
+### Elasticsearch Monitoring

 ```bash
 # Health check
-redis-cli ping
+curl http://localhost:9200/_cluster/health?pretty

-# Get info
-redis-cli INFO
-
-# Database stats
-redis-cli INFO stats
-
-# Memory usage
-redis-cli INFO memory
+# Index stats
+curl http://localhost:9200/hasher/_stats?pretty
 ```

 ---

 ## Backup and Recovery

-### Redis Backups
+### Elasticsearch Snapshots

 ```bash
-# Enable AOF (Append Only File) persistence
-redis-cli CONFIG SET appendonly yes
+# Configure snapshot repository
+curl -X PUT "localhost:9200/_snapshot/hasher_backup" -H 'Content-Type: application/json' -d'
+{
+  "type": "fs",
+  "settings": {
+    "location": "/mnt/backups/elasticsearch"
+  }
+}'

-# Save RDB snapshot manually
-redis-cli SAVE
+# Create snapshot
+curl -X PUT "localhost:9200/_snapshot/hasher_backup/snapshot_1?wait_for_completion=true"

-# Configure automatic backups in redis.conf
-save 900 1      # Save if 1 key changed in 15 minutes
-save 300 10     # Save if 10 keys changed in 5 minutes
-save 60 10000   # Save if 10000 keys changed in 1 minute
-
-# Backup files location (default)
-# RDB: /var/lib/redis/dump.rdb
-# AOF: /var/lib/redis/appendonly.aof
-
-# Restore from backup
-sudo systemctl stop redis-server
-sudo cp /backup/dump.rdb /var/lib/redis/
-sudo systemctl start redis-server
+# Restore snapshot
+curl -X POST "localhost:9200/_snapshot/hasher_backup/snapshot_1/_restore"
 ```

 ---
@@ -361,14 +344,13 @@ sudo systemctl start redis-server

 1. Deploy multiple Next.js instances
 2. Use a load balancer (nginx, HAProxy)
-3. Share the same Redis instance or cluster
+3. Share the same Elasticsearch cluster

-### Redis Scaling
+### Elasticsearch Scaling

-1. Use Redis Cluster for horizontal scaling
-2. Set up Redis Sentinel for high availability
-3. Use read replicas for read-heavy workloads
-4. Consider Redis Enterprise for advanced features
+1. Add more nodes to the cluster
+2. Increase shard count (already set to 10)
+3. Use replicas for read scaling

 ---

@@ -381,31 +363,28 @@ pm2 status
 pm2 logs hasher --lines 100
 ```

-### Check Redis
+### Check Elasticsearch

 ```bash
-redis-cli ping
-redis-cli DBSIZE
-redis-cli INFO stats
+curl http://localhost:9200/_cluster/health
+curl http://localhost:9200/hasher/_count
 ```

 ### Common Issues

-**Issue**: Cannot connect to Redis
+**Issue**: Cannot connect to Elasticsearch
 - Check firewall rules
- Verify Redis is running: `redis-cli ping`
- Check `REDIS_HOST`, `REDIS_PORT`, and `REDIS_PASSWORD` environment variables
+- Verify Elasticsearch is running
+- Check `ELASTICSEARCH_NODE` environment variable

 **Issue**: Out of memory
 - Increase Node.js memory: `NODE_OPTIONS=--max-old-space-size=4096`
- Configure Redis maxmemory and eviction policy
- Use Redis persistence (RDB/AOF) carefully
+- Increase Elasticsearch heap size

 **Issue**: Slow searches
- Verify O(1) lookups are being used (direct key access)
- Check Redis memory and CPU usage
- Consider using Redis Cluster for distribution
- Optimize key patterns
+- Add more Elasticsearch nodes
+- Optimize queries
+- Increase replica count

 ---

@@ -413,10 +392,9 @@ redis-cli INFO stats

 1. **Enable Next.js Static Optimization**
 2. **Use CDN for static assets**
-3. **Enable Redis pipelining for bulk operations**
-4. **Configure appropriate maxmemory for Redis**
-5. **Use SSD storage for Redis persistence**
-6. **Enable Redis connection pooling (already implemented)**
+3. **Enable Elasticsearch caching**
+4. **Configure appropriate JVM heap for Elasticsearch**
+5. **Use SSD storage for Elasticsearch**

 ---

@@ -424,6 +402,5 @@ redis-cli INFO stats

 For deployment issues, check:
 - [Next.js Deployment Docs](https://nextjs.org/docs/deployment)
- [Redis Setup Guide](https://redis.io/docs/getting-started/)
- [ioredis Documentation](https://github.com/redis/ioredis)
+- [Elasticsearch Setup Guide](https://www.elastic.co/guide/en/elasticsearch/reference/current/setup.html)
 - Project GitHub Issues
--- a/PROJECT_SUMMARY.md
+++ b/PROJECT_SUMMARY.md
@@ -2,7 +2,7 @@

 ## 📋 Project Overview

-**Hasher** is a modern, high-performance hash search and generation tool built with Next.js and powered by Redis. It provides a beautiful web interface for searching hash values and generating cryptographic hashes from plaintext.
+**Hasher** is a modern, high-performance hash search and generation tool built with Next.js and powered by Elasticsearch. It provides a beautiful web interface for searching hash values and generating cryptographic hashes from plaintext.

 ### Version: 1.0.0
 ### Status: ✅ Production Ready
@@ -25,10 +25,10 @@
 - Copy-to-clipboard functionality

 ### 📊 Backend
- Redis integration with ioredis
- Key-value storage with hash indexes
+- Elasticsearch 8.x integration
+- 10-shard index for horizontal scaling
 - RESTful API with JSON responses
- Automatic key structure initialization
+- Automatic index creation and initialization
 - Health monitoring endpoint

 ### 🎨 Frontend
@@ -52,7 +52,7 @@
 ### Stack
 - **Frontend**: Next.js 16.0, React 19.2, Tailwind CSS 4.x
 - **Backend**: Next.js API Routes, Node.js 18+
- **Database**: Redis 6.x+
+- **Database**: Elasticsearch 8.x
 - **Language**: TypeScript 5.x
 - **Icons**: Lucide React

@@ -68,7 +68,7 @@ hasher/
 │   └── globals.css               # Global styles
 │
 ├── lib/
-│   ├── redis.ts                  # Redis client & config
+│   ├── elasticsearch.ts          # ES client & config
 │   └── hash.ts                   # Hash utilities
 │
 ├── scripts/
@@ -106,7 +106,7 @@ Search for hashes or generate from plaintext
 - **Output**: Hash results or generated hashes

 ### GET /api/health
-Check system health and Redis status
+Check system health and Elasticsearch status
 - **Output**: System status and statistics

 ---
@@ -139,34 +139,28 @@ npm run index-file wordlist.txt -- --batch-size 500

 ### Environment Configuration
 ```bash
-# Optional: Set Redis connection details
-export REDIS_HOST=localhost
-export REDIS_PORT=6379
-export REDIS_PASSWORD=your-password
-export REDIS_DB=0
+# Optional: Set Elasticsearch endpoint
+export ELASTICSEARCH_NODE=http://localhost:9200
 ```

 ---

-## 🗄️ Redis Data Structure
+## 🗄️ Elasticsearch Configuration

-### Key Patterns
- **Documents**: `hash:plaintext:{plaintext}` - Main document storage
- **MD5 Index**: `hash:index:md5:{hash}` - MD5 hash lookup
- **SHA1 Index**: `hash:index:sha1:{hash}` - SHA1 hash lookup
- **SHA256 Index**: `hash:index:sha256:{hash}` - SHA256 hash lookup
- **SHA512 Index**: `hash:index:sha512:{hash}` - SHA512 hash lookup
- **Statistics**: `hash:stats` - Redis Hash with count and size
+### Index: `hasher`
+- **Shards**: 10 (horizontal scaling)
+- **Replicas**: 1 (redundancy)
+- **Analyzer**: Custom lowercase analyzer

-### Document Schema
+### Schema
 ```json
 {
-  "plaintext": "string",
-  "md5": "string",
-  "sha1": "string",
-  "sha256": "string",
-  "sha512": "string",
-  "created_at": "ISO 8601 date string"
+  "plaintext": "text + keyword",
+  "md5": "keyword",
+  "sha1": "keyword",
+  "sha256": "keyword",
+  "sha512": "keyword",
+  "created_at": "date"
 }
 ```

@@ -186,9 +180,9 @@ export REDIS_DB=0
 ## 🚀 Performance Metrics

 - **Bulk Indexing**: 1000-5000 docs/sec
- **Search Latency**: <10ms (typical O(1) lookups)
- **Concurrent Users**: 100+ supported
- **Horizontal Scaling**: Ready with Redis Cluster
+- **Search Latency**: <50ms (typical)
+- **Concurrent Users**: 50+ supported
+- **Horizontal Scaling**: Ready with 10 shards

 ---

@@ -226,9 +220,9 @@ export REDIS_DB=0

 ### Requirements
 - Node.js 18.x or higher
- Redis 6.x or higher
+- Elasticsearch 8.x
 - 512MB RAM minimum
- Redis server running locally or remotely
+- Internet connection for Elasticsearch

 ---

@@ -291,7 +285,7 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
 ## 🙏 Acknowledgments

 - Built with [Next.js](https://nextjs.org/)
- Powered by [Redis](https://redis.io/)
+- Powered by [Elasticsearch](https://www.elastic.co/)
 - Icons by [Lucide](https://lucide.dev/)
 - Styled with [Tailwind CSS](https://tailwindcss.com/)

@@ -319,7 +313,7 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
 ### Completed ✅
 - [x] Core hash search functionality
 - [x] Hash generation from plaintext
- [x] Redis integration
+- [x] Elasticsearch integration
 - [x] Modern responsive UI
 - [x] Bulk indexing script
 - [x] API endpoints
--- a/QUICK_REFERENCE.md
+++ b/QUICK_REFERENCE.md
@@ -45,35 +45,32 @@ GET /api/health
 - **Web Interface**: http://localhost:3000
 - **Search API**: http://localhost:3000/api/search
 - **Health API**: http://localhost:3000/api/health
- **Redis**: localhost:6379
+- **Elasticsearch**: http://localhost:9200

-## 📊 Redis Commands
+## 📊 Elasticsearch Commands

 ```bash
-# Test connection
-redis-cli ping
+# Health
+curl http://localhost:9200/_cluster/health?pretty

-# Get database stats
-redis-cli INFO stats
+# Index stats
+curl http://localhost:9200/hasher/_stats?pretty

-# Count all keys
-redis-cli DBSIZE
+# Document count
+curl http://localhost:9200/hasher/_count?pretty

-# List all hash documents
-redis-cli KEYS "hash:plaintext:*"
+# Search
+curl http://localhost:9200/hasher/_search?pretty

-# Get document
-redis-cli GET "hash:plaintext:password"
-
-# Clear all data (CAUTION!)
-redis-cli FLUSHDB
+# Delete index (CAUTION!)
+curl -X DELETE http://localhost:9200/hasher
 ```

 ## 🐛 Troubleshooting

 | Problem | Solution |
 |---------|----------|
-| Can't connect to Redis | Check `REDIS_HOST` and `REDIS_PORT` env vars |
+| Can't connect to ES | Check `ELASTICSEARCH_NODE` env var |
 | Port 3000 in use | Use `PORT=3001 npm run dev` |
 | Module not found | Run `npm install` |
 | Build errors | Run `npm run build` to see details |
@@ -84,14 +81,17 @@ redis-cli FLUSHDB
 |------|---------|
 | `app/page.tsx` | Main UI component |
 | `app/api/search/route.ts` | Search endpoint |
-| `lib/redis.ts` | Redis configuration |
+| `lib/elasticsearch.ts` | ES configuration |
+| `lib/hash.ts` | Hash utilities |
+| `scripts/index-file.ts` | Bulk indexer |
+
+## ⚙️ Environment Variables

 ```bash
+# Required
+ELASTICSEARCH_NODE=http://localhost:9200
+
 # Optional
-REDIS_HOST=localhost
-REDIS_PORT=6379
-REDIS_PASSWORD=your-password
-REDIS_DB=0
 NODE_ENV=production
 ```

--- a/README.md
+++ b/README.md
@@ -1,9 +1,9 @@
 # Hasher 🔐

-A modern, high-performance hash search and generation tool powered by Redis and Next.js. Search for hash values to find their plaintext origins or generate hashes from any text input.
+A modern, high-performance hash search and generation tool powered by Elasticsearch and Next.js. Search for hash values to find their plaintext origins or generate hashes from any text input.

 ![Hasher Banner](https://img.shields.io/badge/Next.js-16.0-black?style=for-the-badge&logo=next.js)
-![Redis](https://img.shields.io/badge/Redis-7.x-DC382D?style=for-the-badge&logo=redis)
+![Elasticsearch](https://img.shields.io/badge/Elasticsearch-8.x-005571?style=for-the-badge&logo=elasticsearch)
 ![TypeScript](https://img.shields.io/badge/TypeScript-5.x-3178C6?style=for-the-badge&logo=typescript)

 ## ✨ Features
@@ -11,11 +11,10 @@ A modern, high-performance hash search and generation tool powered by Redis and
 - 🔍 **Hash Lookup**: Search for MD5, SHA1, SHA256, and SHA512 hashes
 - 🔑 **Hash Generation**: Generate multiple hash types from plaintext
 - 💾 **Auto-Indexing**: Automatically stores searched plaintext and hashes
- 📊 **Redis Backend**: Ultra-fast in-memory storage with persistence
- 🚀 **Bulk Indexing**: Import wordlists via command-line script with resume capability
+- 📊 **Elasticsearch Backend**: Scalable storage with 10 shards for performance
+- 🚀 **Bulk Indexing**: Import wordlists via command-line script
 - 🎨 **Modern UI**: Beautiful, responsive interface with real-time feedback
 - 📋 **Copy to Clipboard**: One-click copying of any hash value
- ⚡ **High Performance**: Lightning-fast searches with Redis indexing

 ## 🏗️ Architecture

@@ -33,9 +32,8 @@ A modern, high-performance hash search and generation tool powered by Redis and
       │
       ↓
 ┌─────────────┐
-│    Redis    │ ← In-memory storage
-│  (Key-Value │   (localhost:6379)
-│   + Hashes) │
+│Elasticsearch│ ← Distributed storage
+│ 10 Shards   │   (localhost:9200)
 └─────────────┘
 ```

@@ -44,7 +42,7 @@ A modern, high-performance hash search and generation tool powered by Redis and
 ### Prerequisites

 - Node.js 18.x or higher
- Redis 6.x or higher running on `localhost:6379`
+- Elasticsearch 8.x running on `localhost:9200`
 - npm or yarn

 ### Installation
@@ -60,33 +58,20 @@ A modern, high-performance hash search and generation tool powered by Redis and
   npm install
   ```

-3. **Start Redis** (if not already running)
+3. **Configure Elasticsearch** (optional)
+   
+   By default, the app connects to `http://localhost:9200`. To change this:
   
   ```bash
-   # Using Docker
-   docker run -d --name redis -p 6379:6379 redis:latest
-   
-   # Or using system package manager
-   sudo systemctl start redis
+   export ELASTICSEARCH_NODE=http://your-elasticsearch-host:9200
   ```

-4. **Configure Redis** (optional)
-   
-   By default, the app connects to `localhost:6379`. To change this:
-   
-   ```bash
-   export REDIS_HOST=your-redis-host
-   export REDIS_PORT=6379
-   export REDIS_PASSWORD=your-password  # if authentication is enabled
-   export REDIS_DB=0                    # database number
-   ```
-
-5. **Run the development server**
+4. **Run the development server**
   ```bash
   npm run dev
   ```

-6. **Open your browser**
+5. **Open your browser**
   
   Navigate to [http://localhost:3000](http://localhost:3000)

@@ -115,12 +100,6 @@ npm run index-file wordlist.txt
 # With custom batch size
 npm run index-file wordlist.txt -- --batch-size 500

-# Skip duplicate checking (faster)
-npm run index-file wordlist.txt -- --no-check
-
-# Resume interrupted indexing
-npm run index-file wordlist.txt -- --resume
-
 # Show help
 npm run index-file -- --help
 ```
@@ -135,11 +114,10 @@ qwerty

 **Script features**:
 - ✅ Bulk indexing with configurable batch size
- ✅ Progress indicator and real-time stats
- ✅ State persistence with resume capability
- ✅ Optional duplicate checking
+- ✅ Progress indicator with percentage
 - ✅ Error handling and reporting
 - ✅ Performance metrics (docs/sec)
+- ✅ Automatic index refresh

 ## 🔌 API Reference

@@ -193,17 +171,15 @@ Search for a hash or generate hashes from plaintext.

 **GET** `/api/health`

-Check Redis connection and index status.
+Check Elasticsearch connection and index status.

 **Response**:
 ```json
 {
  "status": "ok",
-  "redis": {
-    "connected": true,
-    "version": "7.0.15",
-    "usedMemory": 2097152,
-    "dbSize": 1542
+  "elasticsearch": {
+    "cluster": "elasticsearch",
+    "status": "green"
  },
  "index": {
    "exists": true,
@@ -216,33 +192,30 @@ Check Redis connection and index status.
 }
 ```

-## 🗄️ Redis Data Structure
+## 🗄️ Elasticsearch Index

-### Key Structure
+### Index Configuration

-**Main Documents**: `hash:plaintext:{plaintext}`
- Stores complete hash document as JSON string
- Contains all hash algorithms and metadata
+- **Name**: `hasher`
+- **Shards**: 10 (for horizontal scaling)
+- **Replicas**: 1 (for redundancy)

-**Hash Indexes**: `hash:index:{algorithm}:{hash}`
- Reverse lookup from hash to plaintext
- One key per algorithm (md5, sha1, sha256, sha512)
- Value is the plaintext string
+### Mapping Schema

-**Statistics**: `hash:stats` (Redis Hash)
- `count`: Total number of unique plaintexts
- `size`: Approximate total size in bytes
-
-### Document Schema
-
-```typescript
+```json
 {
-  "plaintext": string,
-  "md5": string,
-  "sha1": string,
-  "sha256": string,
-  "sha512": string,
-  "created_at": string (ISO 8601)
+  "plaintext": {
+    "type": "text",
+    "analyzer": "lowercase_analyzer",
+    "fields": {
+      "keyword": { "type": "keyword" }
+    }
+  },
+  "md5": { "type": "keyword" },
+  "sha1": { "type": "keyword" },
+  "sha256": { "type": "keyword" },
+  "sha512": { "type": "keyword" },
+  "created_at": { "type": "date" }
 }
 ```

@@ -260,11 +233,10 @@ hasher/
 │   ├── page.tsx                # Main UI component
 │   └── globals.css             # Global styles
 ├── lib/
-│   ├── redis.ts                # Redis client & data layer
+│   ├── elasticsearch.ts        # ES client & index config
 │   └── hash.ts                 # Hash utilities
 ├── scripts/
-│   ├── index-file.ts           # Bulk indexing script
-│   └── remove-duplicates.ts    # Duplicate removal utility
+│   └── index-file.ts           # Bulk indexing script
 ├── package.json
 ├── tsconfig.json
 ├── next.config.ts
@@ -285,10 +257,7 @@ npm run start
 Create a `.env.local` file:

 ```env
-REDIS_HOST=localhost
-REDIS_PORT=6379
-REDIS_PASSWORD=your-password
-REDIS_DB=0
+ELASTICSEARCH_NODE=http://localhost:9200
 ```

 ### Linting
@@ -330,7 +299,7 @@ This project is open source and available under the [MIT License](LICENSE).
 ## 🙏 Acknowledgments

 - Built with [Next.js](https://nextjs.org/)
- Powered by [Redis](https://redis.io/)
+- Powered by [Elasticsearch](https://www.elastic.co/)
 - Icons by [Lucide](https://lucide.dev/)
 - Styled with [Tailwind CSS](https://tailwindcss.com/)

--- a/REDIS_QUICKSTART.md
+++ b/REDIS_QUICKSTART.md
@@ -1,222 +0,0 @@
-# Redis Migration - Quick Reference
-
-## 🚀 Quick Start
-
-### 1. Install Redis
-```bash
-# Ubuntu/Debian
-sudo apt-get install redis-server
-
-# macOS
-brew install redis
-
-# Start Redis
-redis-server
-# or
-sudo systemctl start redis-server
-```
-
-### 2. Configure Environment (Optional)
-```bash
-# Create .env.local
-cat > .env.local << EOF
-REDIS_HOST=localhost
-REDIS_PORT=6379
-REDIS_PASSWORD=  # Leave empty if no password
-REDIS_DB=0
-EOF
-```
-
-### 3. Start Application
-```bash
-yarn dev
-```
-
-## 🔍 Testing the Migration
-
-### Test Health Endpoint
-```bash
-curl http://localhost:3000/api/health
-```
-
-Expected response:
-```json
-{
-  "status": "ok",
-  "redis": {
-    "version": "7.x",
-    "memory": "1.5M",
-    "dbSize": 0
-  },
-  "stats": {
-    "count": 0,
-    "size": 0
-  }
-}
-```
-
-### Test Search API
-```bash
-# Generate hashes
-curl -X POST http://localhost:3000/api/search \
-  -H "Content-Type: application/json" \
-  -d '{"query":"password"}'
-
-# Search for hash
-curl -X POST http://localhost:3000/api/search \
-  -H "Content-Type: application/json" \
-  -d '{"query":"5f4dcc3b5aa765d61d8327deb882cf99"}'
-```
-
-## 📊 Redis Commands
-
-### Check Connection
-```bash
-redis-cli ping
-# Should return: PONG
-```
-
-### View Data
-```bash
-# Count all keys
-redis-cli DBSIZE
-
-# List all documents
-redis-cli KEYS "hash:plaintext:*"
-
-# Get a specific document
-redis-cli GET "hash:plaintext:password"
-
-# Get statistics
-redis-cli HGETALL hash:stats
-
-# Search by hash
-redis-cli GET "hash:index:md5:5f4dcc3b5aa765d61d8327deb882cf99"
-```
-
-### Clear Data (if needed)
-```bash
-# WARNING: Deletes ALL data in current database
-redis-cli FLUSHDB
-```
-
-## 🔄 Bulk Indexing
-
-### Basic Usage
-```bash
-yarn index-file sample-wordlist.txt
-```
-
-### Advanced Options
-```bash
-# Custom batch size
-yarn index-file wordlist.txt -- --batch-size 500
-
-# Skip duplicate checking (faster)
-yarn index-file wordlist.txt -- --no-check
-
-# Resume from previous state
-yarn index-file wordlist.txt -- --resume
-
-# Custom state file
-yarn index-file wordlist.txt -- --state-file .my-state.json
-```
-
-## 🐛 Troubleshooting
-
-### Cannot connect to Redis
-```bash
-# Check if Redis is running
-redis-cli ping
-
-# Check Redis status
-sudo systemctl status redis-server
-
-# View Redis logs
-sudo journalctl -u redis-server -f
-```
-
-### Application shows Redis errors
-1. Verify Redis is running: `redis-cli ping`
-2. Check environment variables in `.env.local`
-3. Check firewall rules if Redis is on another machine
-4. Verify Redis password if authentication is enabled
-
-### Clear stale state files
-```bash
-rm -f .indexer-state-*.json
-```
-
-## 📈 Monitoring
-
-### Redis Memory Usage
-```bash
-redis-cli INFO memory
-```
-
-### Redis Stats
-```bash
-redis-cli INFO stats
-```
-
-### Application Stats
-```bash
-curl http://localhost:3000/api/health | jq .
-```
-
-## 🔒 Security (Production)
-
-### Enable Redis Authentication
-```bash
-# Edit redis.conf
-sudo nano /etc/redis/redis.conf
-
-# Add/uncomment:
-requirepass your-strong-password
-
-# Restart Redis
-sudo systemctl restart redis-server
-```
-
-### Update .env.local
-```env
-REDIS_PASSWORD=your-strong-password
-```
-
-## 📚 Key Differences from Elasticsearch
-
-| Feature | Elasticsearch | Redis |
-|---------|--------------|-------|
-| Data Model | Document-based | Key-value |
-| Search Complexity | O(log n) | O(1) |
-| Setup | Complex cluster | Single instance |
-| Memory | Higher | Lower |
-| Latency | ~50ms | <10ms |
-| Scaling | Shards/Replicas | Cluster/Sentinel |
-
-## ✅ Verification Checklist
-
- [ ] Redis is installed and running
- [ ] Application builds without errors (`yarn build`)
- [ ] Health endpoint returns OK status
- [ ] Can generate hashes from plaintext
- [ ] Can search for generated hashes
- [ ] Statistics display on homepage
- [ ] Bulk indexing script works
- [ ] Data persists after application restart
-
-## 📞 Support
-
- Redis Documentation: https://redis.io/docs/
- ioredis Documentation: https://github.com/redis/ioredis
- Project README: [README.md](README.md)
-
---
-
-**Quick Test Command:**
-```bash
-# One-liner to test everything
-redis-cli ping && yarn build && curl -s http://localhost:3000/api/health | jq .status
-```
-
-If all commands succeed, the migration is working correctly! ✅
--- a/TESTING.md
+++ b/TESTING.md
@@ -9,7 +9,7 @@ This guide will help you quickly set up and test the Hasher application.
 Ensure you have:
 - ✅ Node.js 18.x or higher (`node --version`)
 - ✅ npm (`npm --version`)
- ✅ Redis running on `localhost:6379`
+- ✅ Elasticsearch running on `localhost:9200`

 ### 2. Installation

@@ -26,7 +26,7 @@ npm run dev

 The application will be available at: **http://localhost:3000**

-### 3. Verify Redis Connection
+### 3. Verify Elasticsearch Connection

 ```bash
 # Check health endpoint
@@ -37,15 +37,7 @@ Expected response:
 ```json
 {
  "status": "ok",
-  "redis": {
-    "version": "7.x",
-    "memory": "1.5M",
-    "dbSize": 0
-  },
-  "stats": {
-    "count": 0,
-    "size": 0
-  }
+  "elasticsearch": { ... }
 }
 ```

@@ -94,18 +86,20 @@ npm run index-file sample-wordlist.txt
 ```
 📚 Hasher Indexer
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-Redis: localhost:6379
+Elasticsearch: http://localhost:9200
+Index: hasher
 File: sample-wordlist.txt
 Batch size: 100
-Duplicate check: enabled

-🔗 Connecting to Redis...
+🔗 Connecting to Elasticsearch...
 ✅ Connected successfully

 📖 Reading file...
 ✅ Found 20 words/phrases to process

-⏳ Progress: 20/20 (100.0%) - Indexed: 20, Skipped: 0, Errors: 0
+⏳ Progress: 20/20 (100.0%) - Indexed: 20, Errors: 0
+
+🔄 Refreshing index...

 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 ✅ Indexing complete!
@@ -191,13 +185,13 @@ fetch('/api/search', {
 - [ ] Results display correctly

 ### Data Persistence
- [ ] New plaintext is saved to Redis
+- [ ] New plaintext is saved to Elasticsearch
 - [ ] Saved hashes can be found in subsequent searches
 - [ ] Bulk indexing saves all entries
- [ ] Redis keys are created with proper patterns
+- [ ] Index is created automatically if missing

 ### Error Handling
- [ ] Redis connection errors are handled
+- [ ] Elasticsearch connection errors are handled
 - [ ] Empty search queries are prevented
 - [ ] Invalid input is handled gracefully
 - [ ] Network errors show user-friendly messages
@@ -206,16 +200,15 @@ fetch('/api/search', {

 ## 🐛 Common Issues & Solutions

-### Issue: Cannot connect to Redis
+### Issue: Cannot connect to Elasticsearch

 **Solution**:
 ```bash
-# Check if Redis is running
-redis-cli ping
+# Check if Elasticsearch is running
+curl http://localhost:9200

-# If not accessible, update the environment variables
-export REDIS_HOST=localhost
-export REDIS_PORT=6379
+# If not accessible, update the environment variable
+export ELASTICSEARCH_NODE=http://your-elasticsearch-host:9200
 npm run dev
 ```

@@ -249,34 +242,33 @@ npm run index-file -- "$(pwd)/sample-wordlist.txt"

 ---

-## 📊 Verify Data in Redis
+## 📊 Verify Data in Elasticsearch

-### Check Database Size
+### Check Index Stats
 ```bash
-redis-cli DBSIZE
+curl http://localhost:9200/hasher/_stats?pretty
 ```

-### Get Statistics
+### Count Documents
 ```bash
-redis-cli HGETALL hash:stats
+curl http://localhost:9200/hasher/_count?pretty
 ```

 ### View Sample Documents
 ```bash
-# List first 10 document keys
-redis-cli --scan --pattern "hash:plaintext:*" | head -10
-
-# Get a specific document
-redis-cli GET "hash:plaintext:password"
+curl http://localhost:9200/hasher/_search?pretty&size=5
 ```

 ### Search Specific Hash
 ```bash
-# Find document by MD5 hash
-redis-cli GET "hash:index:md5:5f4dcc3b5aa765d61d8327deb882cf99"
-
-# Then get the full document
-redis-cli GET "hash:plaintext:password"
+curl http://localhost:9200/hasher/_search?pretty -H 'Content-Type: application/json' -d'
+{
+  "query": {
+    "term": {
+      "md5": "5f4dcc3b5aa765d61d8327deb882cf99"
+    }
+  }
+}'
 ```

 ---
@@ -337,7 +329,7 @@ Create `search.json`:
 - [ ] CORS configuration
 - [ ] Rate limiting (if implemented)
 - [ ] Error message information disclosure
- [ ] Redis authentication (if enabled)
+- [ ] Elasticsearch authentication (if enabled)

 ---

@@ -347,7 +339,7 @@ Before deploying to production:

 - [ ] All tests passing
 - [ ] Environment variables configured
- [ ] Redis secured and backed up (RDB/AOF)
+- [ ] Elasticsearch secured and backed up
 - [ ] SSL/TLS certificates installed
 - [ ] Error logging configured
 - [ ] Monitoring set up
@@ -365,7 +357,7 @@ Before deploying to production:

 ## Environment
 - Node.js version: 
- Redis version: 
+- Elasticsearch version: 
 - Browser(s) tested: 

 ## Test Results
--- a/app/api/health/route.ts
+++ b/app/api/health/route.ts
@@ -1,29 +1,34 @@
 import { NextResponse } from 'next/server';
-import { getRedisInfo, getStats, INDEX_NAME } from '@/lib/redis';
+import { esClient, INDEX_NAME } from '@/lib/elasticsearch';

 export async function GET() {
  try {
-    // Check Redis connection and get info
-    const redisInfo = await getRedisInfo();
+    // Check Elasticsearch connection
+    const health = await esClient.cluster.health({});
    
-    // Get index stats
-    const stats = await getStats();
+    // Check if index exists
+    const indexExists = await esClient.indices.exists({ index: INDEX_NAME });
+    
+    // Get index stats if exists
+    let stats = null;
+    if (indexExists) {
+      const statsResponse = await esClient.indices.stats({ index: INDEX_NAME });
+      stats = {
+        documentCount: statsResponse._all?.primaries?.docs?.count || 0,
+        indexSize: statsResponse._all?.primaries?.store?.size_in_bytes || 0
+      };
+    }

    return NextResponse.json({
      status: 'ok',
-      redis: {
-        connected: redisInfo.connected,
-        version: redisInfo.version,
-        usedMemory: redisInfo.usedMemory,
-        dbSize: redisInfo.dbSize
+      elasticsearch: {
+        cluster: health.cluster_name,
+        status: health.status,
      },
      index: {
-        exists: true,
+        exists: indexExists,
        name: INDEX_NAME,
-        stats: {
-          documentCount: stats.count,
-          indexSize: stats.size
-        }
+        stats
      }
    });
  } catch (error) {
--- a/app/api/search/route.ts
+++ b/app/api/search/route.ts
@@ -1,52 +1,152 @@
 import { NextRequest, NextResponse } from 'next/server';
-import { storeHashDocument, findByPlaintext, findByHash, initializeRedis } from '@/lib/redis';
+import { esClient, INDEX_NAME, initializeIndex } from '@/lib/elasticsearch';
 import { generateHashes, detectHashType } from '@/lib/hash';

+interface HashDocument {
+  plaintext: string;
+  md5: string;
+  sha1: string;
+  sha256: string;
+  sha512: string;
+  created_at?: string;
+}
+
+// Maximum allowed query length
+const MAX_QUERY_LENGTH = 1000;
+
+// Characters that could be used in NoSQL/Elasticsearch injection attacks
+const DANGEROUS_PATTERNS = [
+  /[{}\[\]]/g,           // JSON structure characters
+  /\$[a-zA-Z]/g,         // MongoDB-style operators
+  /\\u[0-9a-fA-F]{4}/g,  // Unicode escapes
+  /<script/gi,           // XSS attempts
+  /javascript:/gi,       // XSS attempts
+];
+
+/**
+ * Sanitize input to prevent NoSQL injection attacks
+ * For hash lookups, we only need alphanumeric characters and $
+ * For plaintext, we allow more characters but sanitize dangerous patterns
+ */
+function sanitizeInput(input: string): string {
+  // Trim and take first word only
+  let sanitized = input.trim().split(/\s+/)[0] || '';
+  
+  // Limit length
+  if (sanitized.length > MAX_QUERY_LENGTH) {
+    sanitized = sanitized.substring(0, MAX_QUERY_LENGTH);
+  }
+  
+  // Remove null bytes
+  sanitized = sanitized.replace(/\0/g, '');
+  
+  // Check for dangerous patterns
+  for (const pattern of DANGEROUS_PATTERNS) {
+    sanitized = sanitized.replace(pattern, '');
+  }
+  
+  return sanitized;
+}
+
+/**
+ * Validate that the input is safe for use in Elasticsearch queries
+ */
+function isValidInput(input: string): boolean {
+  // Check for empty input
+  if (!input || input.length === 0) {
+    return false;
+  }
+  
+  // Check for excessively long input
+  if (input.length > MAX_QUERY_LENGTH) {
+    return false;
+  }
+  
+  // Check for control characters (except normal whitespace)
+  if (/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/.test(input)) {
+    return false;
+  }
+  
+  return true;
+}
+
 export async function POST(request: NextRequest) {
  try {
-    const { query } = await request.json();
+    const body = await request.json();
    
-    if (!query || typeof query !== 'string') {
+    // Validate request body structure
+    if (!body || typeof body !== 'object') {
      return NextResponse.json(
-        { error: 'Query parameter is required' },
+        { error: 'Invalid request body' },
        { status: 400 }
      );
    }

-    // Ensure Redis is connected
-    await initializeRedis();
+    const { query } = body;

-    const cleanQuery = query.trim().split(/\s+/)[0];
+    // Validate query type
+    if (!query || typeof query !== 'string') {
+      return NextResponse.json(
+        { error: 'Query parameter is required and must be a string' },
+        { status: 400 }
+      );
+    }
+
+    // Validate input before processing
+    if (!isValidInput(query)) {
+      return NextResponse.json(
+        { error: 'Invalid query: contains forbidden characters or is too long' },
+        { status: 400 }
+      );
+    }
+
+    // Sanitize input
+    const cleanQuery = sanitizeInput(query);
    
    if (!cleanQuery) {
      return NextResponse.json(
-        { error: 'Invalid query: only whitespace provided' },
+        { error: 'Invalid query: only whitespace or invalid characters provided' },
        { status: 400 }
      );
    }

+    // Ensure index exists
+    await initializeIndex();
+
    const cleanQueryLower = cleanQuery.toLowerCase();
    const hashType = detectHashType(cleanQueryLower);

    if (hashType) {
-      // Query is a hash - search for it in Redis
-      const doc = await findByHash(hashType, cleanQueryLower);
+      // Query is a hash - search for it in Elasticsearch
+      const searchResponse = await esClient.search<HashDocument>({
+        index: INDEX_NAME,
+        query: {
+          term: {
+            [hashType]: cleanQueryLower
+          }
+        }
+      });

-      if (doc) {
+      const hits = searchResponse.hits.hits;
+
+      if (hits.length > 0) {
        // Found matching plaintext
        return NextResponse.json({
          found: true,
          hashType,
          hash: cleanQuery,
-          results: [{
-            plaintext: doc.plaintext,
-            hashes: {
-              md5: doc.md5,
-              sha1: doc.sha1,
-              sha256: doc.sha256,
-              sha512: doc.sha512,
-            }
-          }]
+          results: hits.map((hit) => {
+            const source = hit._source!;
+            return {
+              plaintext: source.plaintext,
+              hashes: {
+                md5: source.md5,
+                sha1: source.sha1,
+                sha256: source.sha256,
+                sha512: source.sha512,
+              }
+            };
+          })
        });
      } else {
        // Hash not found in database
@@ -59,13 +159,20 @@ export async function POST(request: NextRequest) {
      }
    } else {
      // Query is plaintext - check if it already exists first
-      const existingDoc = await findByPlaintext(cleanQuery);
+      const existsResponse = await esClient.search<HashDocument>({
+        index: INDEX_NAME,
+        query: {
+          term: {
+            'plaintext.keyword': cleanQuery
+          }
+        }
+      });

      let hashes;
-      let wasGenerated = false;

-      if (existingDoc) {
+      if (existsResponse.hits.hits.length > 0) {
        // Plaintext found, retrieve existing hashes
+        const existingDoc = existsResponse.hits.hits[0]._source!;
        hashes = {
          md5: existingDoc.md5,
          sha1: existingDoc.sha1,
@@ -73,22 +180,44 @@ export async function POST(request: NextRequest) {
          sha512: existingDoc.sha512,
        };
      } else {
-        // Plaintext not found, generate and store hashes
-        hashes = await generateHashes(cleanQuery);
+        // Plaintext not found, generate hashes and check if any hash already exists
+        hashes = generateHashes(cleanQuery);
        
-        await storeHashDocument({
-          ...hashes,
-          created_at: new Date().toISOString()
+        const hashExistsResponse = await esClient.search<HashDocument>({
+          index: INDEX_NAME,
+          query: {
+            bool: {
+              should: [
+                { term: { md5: hashes.md5 } },
+                { term: { sha1: hashes.sha1 } },
+                { term: { sha256: hashes.sha256 } },
+                { term: { sha512: hashes.sha512 } },
+              ],
+              minimum_should_match: 1
+            }
+          }
        });

-        wasGenerated = true;
+        if (hashExistsResponse.hits.hits.length === 0) {
+          // No duplicates found, insert new document
+          await esClient.index({
+            index: INDEX_NAME,
+            document: {
+              ...hashes,
+              created_at: new Date().toISOString()
+            }
+          });
+
+          // Refresh index to make the document searchable immediately
+          await esClient.indices.refresh({ index: INDEX_NAME });
+        }
      }

      return NextResponse.json({
        found: true,
        isPlaintext: true,
        plaintext: cleanQuery,
-        wasGenerated,
+        wasGenerated: existsResponse.hits.hits.length === 0,
        hashes: {
          md5: hashes.md5,
          sha1: hashes.sha1,
--- a/app/layout.tsx
+++ b/app/layout.tsx
@@ -14,8 +14,8 @@ const geistMono = Geist_Mono({

 export const metadata: Metadata = {
  title: "Hasher - Hash Search & Generator",
-  description: "Search for hashes or generate them from plaintext. Supports MD5, SHA1, SHA256, and SHA512. Powered by Redis.",
-  keywords: ["hash", "md5", "sha1", "sha256", "sha512", "hash generator", "hash search", "redis"],
+  description: "Search for hashes or generate them from plaintext. Supports MD5, SHA1, SHA256, and SHA512. Powered by Elasticsearch.",
+  keywords: ["hash", "md5", "sha1", "sha256", "sha512", "hash generator", "hash search", "elasticsearch"],
  authors: [{ name: "Hasher" }],
  creator: "Hasher",
  publisher: "Hasher",
--- a/app/page.tsx
+++ b/app/page.tsx
@@ -1,7 +1,8 @@
 'use client';

-import { useState, useEffect } from 'react';
-import { Search, Copy, Check, Hash, Key, AlertCircle, Loader2, Database } from 'lucide-react';
+import { useState, useEffect, useCallback, Suspense } from 'react';
+import { useSearchParams } from 'next/navigation';
+import { Search, Copy, Check, Hash, Key, AlertCircle, Loader2, Database, Link } from 'lucide-react';

 interface SearchResult {
  found: boolean;
@@ -45,13 +46,62 @@ function formatNumber(num: number): string {
  return num.toLocaleString();
 }

-export default function Home() {
+function HasherContent() {
+  const searchParams = useSearchParams();
  const [query, setQuery] = useState('');
  const [result, setResult] = useState<SearchResult | null>(null);
  const [loading, setLoading] = useState(false);
  const [error, setError] = useState('');
  const [copiedField, setCopiedField] = useState<string | null>(null);
  const [stats, setStats] = useState<IndexStats | null>(null);
+  const [copiedLink, setCopiedLink] = useState(false);
+  const [initialLoadDone, setInitialLoadDone] = useState(false);
+
+  const performSearch = useCallback(async (searchQuery: string, updateUrl: boolean = true) => {
+    if (!searchQuery.trim()) return;
+
+    setLoading(true);
+    setError('');
+    setResult(null);
+
+    try {
+      const response = await fetch('/api/search', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ query: searchQuery.trim() })
+      });
+
+      if (!response.ok) {
+        throw new Error('Search failed');
+      }
+
+      const data = await response.json();
+      setResult(data);
+      
+      // Update URL with search query (using history API to avoid re-triggering effects)
+      if (updateUrl) {
+        const newUrl = new URL(window.location.href);
+        newUrl.searchParams.set('q', searchQuery.trim());
+        window.history.replaceState(null, '', newUrl.pathname + newUrl.search);
+      }
+    } catch (_err) {
+      setError('Failed to perform search. Please check your connection.');
+    } finally {
+      setLoading(false);
+    }
+  }, []);
+
+  // Load query from URL on mount (only once)
+  useEffect(() => {
+    if (initialLoadDone) return;
+    
+    const urlQuery = searchParams.get('q');
+    if (urlQuery) {
+      setQuery(urlQuery);
+      performSearch(urlQuery, false);
+    }
+    setInitialLoadDone(true);
+  }, [searchParams, performSearch, initialLoadDone]);

  useEffect(() => {
    const fetchStats = async () => {
@@ -73,30 +123,7 @@ export default function Home() {

  const handleSearch = async (e: React.FormEvent) => {
    e.preventDefault();
-    if (!query.trim()) return;
-
-    setLoading(true);
-    setError('');
-    setResult(null);
-
-    try {
-      const response = await fetch('/api/search', {
-        method: 'POST',
-        headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify({ query: query.trim() })
-      });
-
-      if (!response.ok) {
-        throw new Error('Search failed');
-      }
-
-      const data = await response.json();
-      setResult(data);
-    } catch (_err) {
-      setError('Failed to perform search. Please check your connection.');
-    } finally {
-      setLoading(false);
-    }
+    performSearch(query);
  };

  const copyToClipboard = (text: string, field: string) => {
@@ -105,6 +132,14 @@ export default function Home() {
    setTimeout(() => setCopiedField(null), 2000);
  };

+  const copyShareLink = () => {
+    const url = new URL(window.location.href);
+    url.searchParams.set('q', query.trim());
+    navigator.clipboard.writeText(url.toString());
+    setCopiedLink(true);
+    setTimeout(() => setCopiedLink(false), 2000);
+  };
+
  const HashDisplay = ({ label, value, field }: { label: string; value: string; field: string }) => (
    <div className="bg-gray-50 rounded-lg p-4 border border-gray-200">
      <div className="flex items-center justify-between mb-2">
@@ -166,19 +201,35 @@ export default function Home() {
              value={query}
              onChange={(e) => setQuery(e.target.value)}
              placeholder="Enter a hash or plaintext..."
-              className="w-full px-6 py-4 pr-14 text-lg rounded-2xl border-2 border-gray-200 focus:border-blue-500 focus:ring-4 focus:ring-blue-100 outline-none transition-all shadow-sm"
+              className="w-full px-6 py-4 pr-28 text-lg rounded-2xl border-2 border-gray-200 focus:border-blue-500 focus:ring-4 focus:ring-blue-100 outline-none transition-all shadow-sm"
            />
-            <button
-              type="submit"
-              disabled={loading || !query.trim()}
-              className="absolute right-2 top-1/2 -translate-y-1/2 bg-gradient-to-r from-blue-600 to-purple-600 text-white p-3 rounded-xl hover:shadow-lg disabled:opacity-50 disabled:cursor-not-allowed transition-all"
-            >
-              {loading ? (
-                <Loader2 className="w-6 h-6 animate-spin" />
-              ) : (
-                <Search className="w-6 h-6" />
+            <div className="absolute right-2 top-1/2 -translate-y-1/2 flex gap-1">
+              {query.trim() && (
+                <button
+                  type="button"
+                  onClick={copyShareLink}
+                  className="bg-gray-100 text-gray-600 p-3 rounded-xl hover:bg-gray-200 transition-all"
+                  title="Copy share link"
+                >
+                  {copiedLink ? (
+                    <Check className="w-6 h-6 text-green-600" />
+                  ) : (
+                    <Link className="w-6 h-6" />
+                  )}
+                </button>
              )}
-            </button>
+              <button
+                type="submit"
+                disabled={loading || !query.trim()}
+                className="bg-gradient-to-r from-blue-600 to-purple-600 text-white p-3 rounded-xl hover:shadow-lg disabled:opacity-50 disabled:cursor-not-allowed transition-all"
+              >
+                {loading ? (
+                  <Loader2 className="w-6 h-6 animate-spin" />
+                ) : (
+                  <Search className="w-6 h-6" />
+                )}
+              </button>
+            </div>
          </div>
        </form>

@@ -308,10 +359,26 @@ export default function Home() {

        {/* Footer */}
        <footer className="mt-16 text-center text-gray-500 text-sm">
-          <p>Powered by Redis • Built with Next.js</p>
+          <p>Powered by Elasticsearch • Built with Next.js</p>
        </footer>
      </div>
    </div>
  );
 }

+function LoadingFallback() {
+  return (
+    <div className="min-h-screen bg-gradient-to-br from-blue-50 via-white to-purple-50 flex items-center justify-center">
+      <Loader2 className="w-12 h-12 text-blue-600 animate-spin" />
+    </div>
+  );
+}
+
+export default function Home() {
+  return (
+    <Suspense fallback={<LoadingFallback />}>
+      <HasherContent />
+    </Suspense>
+  );
+}
+
--- a/lib/elasticsearch.ts
+++ b/lib/elasticsearch.ts
@@ -0,0 +1,76 @@
+import { Client } from '@elastic/elasticsearch';
+
+const ELASTICSEARCH_NODE = process.env.ELASTICSEARCH_NODE || 'http://localhost:9200';
+const INDEX_NAME = 'hasher';
+
+export const esClient = new Client({
+  node: ELASTICSEARCH_NODE,
+  requestTimeout: 30000,
+  maxRetries: 3,
+});
+
+export const INDEX_MAPPING = {
+  settings: {
+    number_of_shards: 10,
+    number_of_replicas: 1,
+    analysis: {
+      analyzer: {
+        lowercase_analyzer: {
+          type: 'custom' as const,
+          tokenizer: 'keyword',
+          filter: ['lowercase']
+        }
+      }
+    }
+  },
+  mappings: {
+    properties: {
+      plaintext: {
+        type: 'text' as const,
+        analyzer: 'lowercase_analyzer',
+        fields: {
+          keyword: {
+            type: 'keyword' as const
+          }
+        }
+      },
+      md5: {
+        type: 'keyword' as const
+      },
+      sha1: {
+        type: 'keyword' as const
+      },
+      sha256: {
+        type: 'keyword' as const
+      },
+      sha512: {
+        type: 'keyword' as const
+      },
+      created_at: {
+        type: 'date' as const
+      }
+    }
+  }
+};
+
+export async function initializeIndex(): Promise<void> {
+  try {
+    const indexExists = await esClient.indices.exists({ index: INDEX_NAME });
+    
+    if (!indexExists) {
+      await esClient.indices.create({
+        index: INDEX_NAME,
+        settings: INDEX_MAPPING.settings,
+        mappings: INDEX_MAPPING.mappings
+      });
+      console.log(`Index '${INDEX_NAME}' created successfully with 10 shards`);
+    } else {
+      console.log(`Index '${INDEX_NAME}' already exists`);
+    }
+  } catch (error) {
+    console.error('Error initializing Elasticsearch index:', error);
+    throw error;
+  }
+}
+
+export { INDEX_NAME };
--- a/lib/hash.ts
+++ b/lib/hash.ts
@@ -11,7 +11,7 @@ export interface HashResult {
 /**
 * Generate all common hashes for a given plaintext
 */
-export async function generateHashes(plaintext: string): Promise<HashResult> {
+export function generateHashes(plaintext: string): HashResult {
  return {
    plaintext,
    md5: crypto.createHash('md5').update(plaintext).digest('hex'),
--- a/lib/redis.ts
+++ b/lib/redis.ts
@@ -1,178 +0,0 @@
-import Redis from 'ioredis';
-
-const REDIS_HOST = process.env.REDIS_HOST || 'localhost';
-const REDIS_PORT = parseInt(process.env.REDIS_PORT || '6379', 10);
-const REDIS_PASSWORD = process.env.REDIS_PASSWORD || undefined;
-const REDIS_DB = parseInt(process.env.REDIS_DB || '0', 10);
-
-export const INDEX_NAME = 'hasher';
-
-// Create Redis client with connection pooling
-export const redisClient = new Redis({
-  host: REDIS_HOST,
-  port: REDIS_PORT,
-  password: REDIS_PASSWORD,
-  db: REDIS_DB,
-  retryStrategy: (times) => {
-    const delay = Math.min(times * 50, 2000);
-    return delay;
-  },
-  maxRetriesPerRequest: 3,
-  enableReadyCheck: true,
-  lazyConnect: false,
-});
-
-// Handle connection errors
-redisClient.on('error', (err) => {
-  console.error('Redis Client Error:', err);
-});
-
-redisClient.on('connect', () => {
-  console.log('Redis connected successfully');
-});
-
-/**
- * Redis Keys Structure:
- * 
- * 1. Hash documents: hash:plaintext:{plaintext} = JSON string
- *    - Stores all hash data for a plaintext
- * 
- * 2. Hash indexes: hash:index:{algorithm}:{hash} = plaintext
- *    - Allows reverse lookup from hash to plaintext
- *    - One key per algorithm (md5, sha1, sha256, sha512)
- * 
- * 3. Statistics: hash:stats = Hash {count, size}
- *    - count: total number of unique plaintexts
- *    - size: approximate total size in bytes
- */
-
-export interface HashDocument {
-  plaintext: string;
-  md5: string;
-  sha1: string;
-  sha256: string;
-  sha512: string;
-  created_at: string;
-}
-
-/**
- * Store a hash document in Redis
- */
-export async function storeHashDocument(doc: HashDocument): Promise<void> {
-  const pipeline = redisClient.pipeline();
-  
-  // Store main document
-  const key = `hash:plaintext:${doc.plaintext}`;
-  pipeline.set(key, JSON.stringify(doc));
-  
-  // Create indexes for each hash type
-  pipeline.set(`hash:index:md5:${doc.md5}`, doc.plaintext);
-  pipeline.set(`hash:index:sha1:${doc.sha1}`, doc.plaintext);
-  pipeline.set(`hash:index:sha256:${doc.sha256}`, doc.plaintext);
-  pipeline.set(`hash:index:sha512:${doc.sha512}`, doc.plaintext);
-  
-  // Update statistics
-  pipeline.hincrby('hash:stats', 'count', 1);
-  pipeline.hincrby('hash:stats', 'size', JSON.stringify(doc).length);
-  
-  await pipeline.exec();
-}
-
-/**
- * Find a hash document by plaintext
- */
-export async function findByPlaintext(plaintext: string): Promise<HashDocument | null> {
-  const key = `hash:plaintext:${plaintext}`;
-  const data = await redisClient.get(key);
-  
-  if (!data) return null;
-  
-  return JSON.parse(data) as HashDocument;
-}
-
-/**
- * Find a hash document by any hash value
- */
-export async function findByHash(algorithm: string, hash: string): Promise<HashDocument | null> {
-  const indexKey = `hash:index:${algorithm}:${hash}`;
-  const plaintext = await redisClient.get(indexKey);
-  
-  if (!plaintext) return null;
-  
-  return findByPlaintext(plaintext);
-}
-
-/**
- * Check if plaintext or any of its hashes exist
- */
-export async function checkExistence(plaintext: string, hashes: {
-  md5: string;
-  sha1: string;
-  sha256: string;
-  sha512: string;
-}): Promise<boolean> {
-  const pipeline = redisClient.pipeline();
-  
-  pipeline.exists(`hash:plaintext:${plaintext}`);
-  pipeline.exists(`hash:index:md5:${hashes.md5}`);
-  pipeline.exists(`hash:index:sha1:${hashes.sha1}`);
-  pipeline.exists(`hash:index:sha256:${hashes.sha256}`);
-  pipeline.exists(`hash:index:sha512:${hashes.sha512}`);
-  
-  const results = await pipeline.exec();
-  
-  if (!results) return false;
-  
-  // Check if any key exists
-  return results.some(([err, value]) => !err && value === 1);
-}
-
-/**
- * Get index statistics
- */
-export async function getStats(): Promise<{ count: number; size: number }> {
-  const stats = await redisClient.hgetall('hash:stats');
-  
-  return {
-    count: parseInt(stats.count || '0', 10),
-    size: parseInt(stats.size || '0', 10)
-  };
-}
-
-/**
- * Initialize Redis (compatibility function, Redis doesn't need explicit initialization)
- */
-export async function initializeRedis(): Promise<void> {
-  // Check connection
-  await redisClient.ping();
-  console.log('Redis initialized successfully');
-}
-
-/**
- * Get Redis info for health check
- */
-export async function getRedisInfo(): Promise<{
-  connected: boolean;
-  version: string;
-  usedMemory: number;
-  dbSize: number;
-}> {
-  const info = await redisClient.info('server');
-  const memory = await redisClient.info('memory');
-  const dbSize = await redisClient.dbsize();
-  
-  // Parse Redis info string
-  const parseInfo = (infoStr: string, key: string): string => {
-    const match = infoStr.match(new RegExp(`${key}:(.+)`));
-    return match ? match[1].trim() : 'unknown';
-  };
-  
-  return {
-    connected: redisClient.status === 'ready',
-    version: parseInfo(info, 'redis_version'),
-    usedMemory: parseInt(parseInfo(memory, 'used_memory'), 10) || 0,
-    dbSize
-  };
-}
-
-export { REDIS_HOST, REDIS_PORT };
--- a/package.json
+++ b/package.json
@@ -1,14 +1,14 @@
 {
  "name": "hasher",
  "version": "1.0.0",
-  "description": "A modern hash search and generation tool powered by Redis and Next.js",
+  "description": "A modern hash search and generation tool powered by Elasticsearch and Next.js",
  "keywords": [
    "hash",
    "md5",
    "sha1",
    "sha256",
    "sha512",
-    "redis",
+    "elasticsearch",
    "nextjs",
    "cryptography",
    "security",
@@ -38,7 +38,7 @@
    "remove-duplicates": "tsx scripts/remove-duplicates.ts"
  },
  "dependencies": {
-    "ioredis": "^5.4.2",
+    "@elastic/elasticsearch": "^9.2.0",
    "lucide-react": "^0.555.0",
    "next": "15.4.8",
    "react": "19.1.2",
--- a/scripts/index-file.ts
+++ b/scripts/index-file.ts
@@ -4,7 +4,7 @@
 * Hasher Indexer Script
 * 
 * This script reads a text file with one word/phrase per line and indexes
- * all the generated hashes into Redis.
+ * all the generated hashes into Elasticsearch.
 * 
 * Usage:
 *   npx tsx scripts/index-file.ts <path-to-file.txt> [options]
@@ -19,16 +19,13 @@
 *   --help, -h             Show this help message
 */

-import Redis from 'ioredis';
+import { Client } from '@elastic/elasticsearch';
 import { createReadStream, existsSync, readFileSync, writeFileSync, unlinkSync } from 'fs';
 import { resolve, basename } from 'path';
 import { createInterface } from 'readline';
 import crypto from 'crypto';

-const REDIS_HOST = process.env.REDIS_HOST || 'localhost';
-const REDIS_PORT = parseInt(process.env.REDIS_PORT || '6379', 10);
-const REDIS_PASSWORD = process.env.REDIS_PASSWORD || undefined;
-const REDIS_DB = parseInt(process.env.REDIS_DB || '0', 10);
+const ELASTICSEARCH_NODE = process.env.ELASTICSEARCH_NODE || 'http://localhost:9200';
 const INDEX_NAME = 'hasher';
 const DEFAULT_BATCH_SIZE = 100;

@@ -159,7 +156,7 @@ function deleteState(stateFile: string): void {
  }
 }

-async function generateHashes(plaintext: string): Promise<HashDocument> {
+function generateHashes(plaintext: string): HashDocument {
  return {
    plaintext,
    md5: crypto.createHash('md5').update(plaintext).digest('hex'),
@@ -188,10 +185,7 @@ Options:
  --help, -h             Show this help message

 Environment Variables:
-  REDIS_HOST             Redis host (default: localhost)
-  REDIS_PORT             Redis port (default: 6379)
-  REDIS_PASSWORD         Redis password (optional)
-  REDIS_DB               Redis database number (default: 0)
+  ELASTICSEARCH_NODE     Elasticsearch node URL (default: http://localhost:9200)

 Examples:
  npx tsx scripts/index-file.ts wordlist.txt
@@ -215,14 +209,7 @@ Duplicate Checking:
 }

 async function indexFile(filePath: string, batchSize: number, shouldResume: boolean, checkDuplicates: boolean, customStateFile: string | null) {
-  const client = new Redis({
-    host: REDIS_HOST,
-    port: REDIS_PORT,
-    password: REDIS_PASSWORD,
-    db: REDIS_DB,
-    retryStrategy: (times) => Math.min(times * 50, 2000),
-  });
-  
+  const client = new Client({ node: ELASTICSEARCH_NODE });
  const absolutePath = resolve(filePath);
  const stateFile = customStateFile || getDefaultStateFile(absolutePath);
  const fileHash = getFileHash(absolutePath);
@@ -260,7 +247,7 @@ async function indexFile(filePath: string, batchSize: number, shouldResume: bool

  console.log(`📚 Hasher Indexer`);
  console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
-  console.log(`Redis: ${REDIS_HOST}:${REDIS_PORT} (DB ${REDIS_DB})`);
+  console.log(`Elasticsearch: ${ELASTICSEARCH_NODE}`);
  console.log(`Index: ${INDEX_NAME}`);
  console.log(`File: ${filePath}`);
  console.log(`Batch size: ${batchSize}`);
@@ -294,8 +281,8 @@ async function indexFile(filePath: string, batchSize: number, shouldResume: bool

  try {
    // Test connection
-    console.log('🔗 Connecting to Redis...');
-    await client.ping();
+    console.log('🔗 Connecting to Elasticsearch...');
+    await client.cluster.health({});
    console.log('✅ Connected successfully\n');

    // Process file line by line using streams
@@ -318,90 +305,100 @@ async function indexFile(filePath: string, batchSize: number, shouldResume: bool
      if (batch.length === 0) return;
      if (isInterrupted) return;

-      // Generate hashes for all items in batch first
-      const batchWithHashes = await Promise.all(
-        batch.map(async (plaintext: string) => ({
-          plaintext,
-          hashes: await generateHashes(plaintext)
-        }))
-      );
+      const bulkOperations: any[] = [];

-      const pipeline = client.pipeline();
-      let toIndex: typeof batchWithHashes = [];
+      // Generate hashes for all items in batch first
+      const batchWithHashes = batch.map((plaintext: string) => ({
+        plaintext,
+        hashes: generateHashes(plaintext)
+      }));

      if (checkDuplicates) {
-        // Check which items already exist
-        const existenceChecks = await Promise.all(
-          batchWithHashes.map(async (item) => {
-            const plaintextExists = await client.exists(`hash:plaintext:${item.plaintext}`);
-            if (plaintextExists) return { item, exists: true };
+        // Check which items already exist (by plaintext or any hash)
+        const md5List = batchWithHashes.map((item: any) => item.hashes.md5);
+        const sha1List = batchWithHashes.map((item: any) => item.hashes.sha1);
+        const sha256List = batchWithHashes.map((item: any) => item.hashes.sha256);
+        const sha512List = batchWithHashes.map((item: any) => item.hashes.sha512);

-            // Check if any hash exists
-            const md5Exists = await client.exists(`hash:index:md5:${item.hashes.md5}`);
-            const sha1Exists = await client.exists(`hash:index:sha1:${item.hashes.sha1}`);
-            const sha256Exists = await client.exists(`hash:index:sha256:${item.hashes.sha256}`);
-            const sha512Exists = await client.exists(`hash:index:sha512:${item.hashes.sha512}`);
+        const existingCheck = await client.search({
+          index: INDEX_NAME,
+          size: batchSize * 5,
+          query: {
+            bool: {
+              should: [
+                { terms: { 'plaintext.keyword': batch } },
+                { terms: { md5: md5List } },
+                { terms: { sha1: sha1List } },
+                { terms: { sha256: sha256List } },
+                { terms: { sha512: sha512List } },
+              ],
+              minimum_should_match: 1
+            }
+          },
+          _source: ['plaintext', 'md5', 'sha1', 'sha256', 'sha512']
+        });

-            return {
-              item,
-              exists: md5Exists || sha1Exists || sha256Exists || sha512Exists
-            };
-          })
-        );
+        // Create a set of existing hashes for quick lookup
+        const existingHashes = new Set<string>();
+        existingCheck.hits.hits.forEach((hit: any) => {
+          const src = hit._source;
+          existingHashes.add(src.plaintext);
+          existingHashes.add(src.md5);
+          existingHashes.add(src.sha1);
+          existingHashes.add(src.sha256);
+          existingHashes.add(src.sha512);
+        });

-        for (const check of existenceChecks) {
-          if (check.exists) {
+        // Prepare bulk operations only for items that don't have any duplicate hash
+        for (const item of batchWithHashes) {
+          const isDuplicate = 
+            existingHashes.has(item.plaintext) ||
+            existingHashes.has(item.hashes.md5) ||
+            existingHashes.has(item.hashes.sha1) ||
+            existingHashes.has(item.hashes.sha256) ||
+            existingHashes.has(item.hashes.sha512);
+
+          if (!isDuplicate) {
+            bulkOperations.push({ index: { _index: INDEX_NAME } });
+            bulkOperations.push(item.hashes);
+          } else {
            state.skipped++;
            sessionSkipped++;
-          } else {
-            toIndex.push(check.item);
          }
        }
      } else {
        // No duplicate checking - index everything
-        toIndex = batchWithHashes;
+        for (const item of batchWithHashes) {
+          bulkOperations.push({ index: { _index: INDEX_NAME } });
+          bulkOperations.push(item.hashes);
+        }
      }

-      // Execute bulk operations
-      if (toIndex.length > 0) {
+      // Execute bulk operation only if there are new items to insert
+      if (bulkOperations.length > 0) {
        try {
-          for (const item of toIndex) {
-            const doc = item.hashes;
-            const key = `hash:plaintext:${doc.plaintext}`;
+          const bulkResponse = await client.bulk({
+            operations: bulkOperations,
+            refresh: false
+          });

-            // Store main document
-            pipeline.set(key, JSON.stringify(doc));
-            
-            // Create indexes for each hash type
-            pipeline.set(`hash:index:md5:${doc.md5}`, doc.plaintext);
-            pipeline.set(`hash:index:sha1:${doc.sha1}`, doc.plaintext);
-            pipeline.set(`hash:index:sha256:${doc.sha256}`, doc.plaintext);
-            pipeline.set(`hash:index:sha512:${doc.sha512}`, doc.plaintext);
-            
-            // Update statistics
-            pipeline.hincrby('hash:stats', 'count', 1);
-            pipeline.hincrby('hash:stats', 'size', JSON.stringify(doc).length);
-          }
-
-          const results = await pipeline.exec();
-          
-          // Count errors
-          const errorCount = results?.filter(([err]) => err !== null).length || 0;
-          
-          if (errorCount > 0) {
+          if (bulkResponse.errors) {
+            const errorCount = bulkResponse.items.filter((item: any) => item.index?.error).length;
            state.errors += errorCount;
            sessionErrors += errorCount;
-            const successCount = toIndex.length - errorCount;
+            const successCount = (bulkOperations.length / 2) - errorCount;
            state.indexed += successCount;
            sessionIndexed += successCount;
          } else {
-            state.indexed += toIndex.length;
-            sessionIndexed += toIndex.length;
+            const count = bulkOperations.length / 2;
+            state.indexed += count;
+            sessionIndexed += count;
          }
        } catch (error) {
          console.error(`\n❌ Error processing batch:`, error);
-          state.errors += toIndex.length;
-          sessionErrors += toIndex.length;
+          const count = bulkOperations.length / 2;
+          state.errors += count;
+          sessionErrors += count;
        }
      }

@@ -453,8 +450,9 @@ async function indexFile(filePath: string, batchSize: number, shouldResume: bool
      return;
    }

-    // No refresh needed for Redis
-    console.log('\n\n✅ All data persisted to Redis');
+    // Refresh index
+    console.log('\n\n🔄 Refreshing index...');
+    await client.indices.refresh({ index: INDEX_NAME });

    // Delete state file on successful completion
    deleteState(stateFile);
--- a/scripts/remove-duplicates.ts
+++ b/scripts/remove-duplicates.ts
@@ -3,7 +3,7 @@
 /**
 * Hasher Duplicate Remover Script
 * 
- * This script finds and removes duplicate entries from Redis.
+ * This script finds and removes duplicate entries from the Elasticsearch index.
 * It identifies duplicates by checking plaintext, md5, sha1, sha256, and sha512 fields.
 * 
 * Usage:
@@ -13,20 +13,20 @@
 * Options:
 *   --dry-run              Show duplicates without removing them (default)
 *   --execute              Actually remove the duplicates
+ *   --batch-size=<number>  Number of items to process in each batch (default: 1000)
 *   --field=<field>        Check duplicates only on this field (plaintext, md5, sha1, sha256, sha512)
 *   --help, -h             Show this help message
 */

-import Redis from 'ioredis';
+import { Client } from '@elastic/elasticsearch';

-const REDIS_HOST = process.env.REDIS_HOST || 'localhost';
-const REDIS_PORT = parseInt(process.env.REDIS_PORT || '6379', 10);
-const REDIS_PASSWORD = process.env.REDIS_PASSWORD || undefined;
-const REDIS_DB = parseInt(process.env.REDIS_DB || '0', 10);
+const ELASTICSEARCH_NODE = process.env.ELASTICSEARCH_NODE || 'http://localhost:9200';
 const INDEX_NAME = 'hasher';
+const DEFAULT_BATCH_SIZE = 1000;

 interface ParsedArgs {
  dryRun: boolean;
+  batchSize: number;
  field: string | null;
  showHelp: boolean;
 }
@@ -34,23 +34,15 @@ interface ParsedArgs {
 interface DuplicateGroup {
  value: string;
  field: string;
-  plaintexts: string[];
-  keepPlaintext: string;
-  deletePlaintexts: string[];
-}
-
-interface HashDocument {
-  plaintext: string;
-  md5: string;
-  sha1: string;
-  sha256: string;
-  sha512: string;
-  created_at: string;
+  documentIds: string[];
+  keepId: string;
+  deleteIds: string[];
 }

 function parseArgs(args: string[]): ParsedArgs {
  const result: ParsedArgs = {
    dryRun: true,
+    batchSize: DEFAULT_BATCH_SIZE,
    field: null,
    showHelp: false
  };
@@ -64,6 +56,21 @@ function parseArgs(args: string[]): ParsedArgs {
      result.dryRun = true;
    } else if (arg === '--execute') {
      result.dryRun = false;
+    } else if (arg.startsWith('--batch-size=')) {
+      const value = arg.split('=')[1];
+      const parsed = parseInt(value, 10);
+      if (!isNaN(parsed) && parsed > 0) {
+        result.batchSize = parsed;
+      }
+    } else if (arg === '--batch-size') {
+      const nextArg = args[i + 1];
+      if (nextArg && !nextArg.startsWith('-')) {
+        const parsed = parseInt(nextArg, 10);
+        if (!isNaN(parsed) && parsed > 0) {
+          result.batchSize = parsed;
+          i++;
+        }
+      }
    } else if (arg.startsWith('--field=')) {
      result.field = arg.split('=')[1];
    } else if (arg === '--field') {
@@ -89,15 +96,13 @@ Usage:
 Options:
  --dry-run              Show duplicates without removing them (default)
  --execute              Actually remove the duplicates
+  --batch-size=<number>  Number of items to process in each batch (default: 1000)
  --field=<field>        Check duplicates only on this field
                         Valid fields: plaintext, md5, sha1, sha256, sha512
  --help, -h             Show this help message

 Environment Variables:
-  REDIS_HOST             Redis host (default: localhost)
-  REDIS_PORT             Redis port (default: 6379)
-  REDIS_PASSWORD         Redis password (optional)
-  REDIS_DB               Redis database number (default: 0)
+  ELASTICSEARCH_NODE     Elasticsearch node URL (default: http://localhost:9200)

 Examples:
  npx tsx scripts/remove-duplicates.ts                    # Dry run, show all duplicates
@@ -114,137 +119,275 @@ Notes:
 }

 async function findDuplicatesForField(
-  client: Redis, 
-  field: string
+  client: Client, 
+  field: string, 
+  batchSize: number
 ): Promise<DuplicateGroup[]> {
  const duplicates: DuplicateGroup[] = [];
  
-  console.log(`   Scanning for ${field} duplicates...`);
+  // Use aggregation to find duplicate values
+  const fieldToAggregate = field === 'plaintext' ? 'plaintext.keyword' : field;
  
-  // Get all keys for this field type
-  const pattern = field === 'plaintext' 
-    ? 'hash:plaintext:*'
-    : `hash:index:${field}:*`;
+  // Use composite aggregation to handle large number of duplicates
+  let afterKey: any = undefined;
+  let hasMore = true;
  
-  const keys = await client.keys(pattern);
+  console.log(`   Scanning for duplicates...`);
  
-  // For hash indexes, group by hash value (not plaintext)
-  const valueMap = new Map<string, string[]>();
-  
-  if (field === 'plaintext') {
-    // Each key is already unique for plaintext
-    // Check for same plaintext with different created_at
-    for (const key of keys) {
-      const plaintext = key.replace('hash:plaintext:', '');
-      if (!valueMap.has(plaintext)) {
-        valueMap.set(plaintext, []);
-      }
-      valueMap.get(plaintext)!.push(plaintext);
-    }
-  } else {
-    // For hash fields, get the plaintext and check if multiple plaintexts have same hash
-    for (const key of keys) {
-      const hashValue = key.replace(`hash:index:${field}:`, '');
-      const plaintext = await client.get(key);
-      
-      if (plaintext) {
-        if (!valueMap.has(hashValue)) {
-          valueMap.set(hashValue, []);
-        }
-        valueMap.get(hashValue)!.push(plaintext);
-      }
-    }
-  }
-  
-  // Find groups with duplicates
-  for (const [value, plaintexts] of valueMap) {
-    const uniquePlaintexts = Array.from(new Set(plaintexts));
-    
-    if (uniquePlaintexts.length > 1) {
-      // Get documents to compare timestamps
-      const docs: { plaintext: string; doc: HashDocument }[] = [];
-      
-      for (const plaintext of uniquePlaintexts) {
-        const docKey = `hash:plaintext:${plaintext}`;
-        const docData = await client.get(docKey);
-        if (docData) {
-          docs.push({ plaintext, doc: JSON.parse(docData) });
+  while (hasMore) {
+    const aggQuery: any = {
+      index: INDEX_NAME,
+      size: 0,
+      aggs: {
+        duplicates: {
+          composite: {
+            size: batchSize,
+            sources: [
+              { value: { terms: { field: fieldToAggregate } } }
+            ],
+            ...(afterKey && { after: afterKey })
+          },
+          aggs: {
+            doc_count_filter: {
+              bucket_selector: {
+                buckets_path: { count: '_count' },
+                script: 'params.count > 1'
+              }
+            }
+          }
        }
      }
+    };

-      // Sort by created_at (oldest first)
-      docs.sort((a, b) => 
-        new Date(a.doc.created_at).getTime() - new Date(b.doc.created_at).getTime()
-      );
+    const response = await client.search(aggQuery);
+    const compositeAgg = response.aggregations?.duplicates as any;
+    const buckets = compositeAgg?.buckets || [];

-      if (docs.length > 1) {
-        duplicates.push({
-          value,
-          field,
-          plaintexts: docs.map(d => d.plaintext),
-          keepPlaintext: docs[0].plaintext,
-          deletePlaintexts: docs.slice(1).map(d => d.plaintext)
+    for (const bucket of buckets) {
+      if (bucket.doc_count > 1) {
+        const value = bucket.key.value;
+        
+        // Use scroll API for large result sets
+        const documentIds: string[] = [];
+        
+        let scrollResponse = await client.search({
+          index: INDEX_NAME,
+          scroll: '1m',
+          size: 1000,
+          query: {
+            term: {
+              [fieldToAggregate]: value
+            }
+          },
+          sort: [
+            { created_at: { order: 'asc' } }
+          ],
+          _source: false
        });
+
+        while (scrollResponse.hits.hits.length > 0) {
+          documentIds.push(...scrollResponse.hits.hits.map((hit: any) => hit._id));
+          
+          if (!scrollResponse._scroll_id) break;
+          
+          scrollResponse = await client.scroll({
+            scroll_id: scrollResponse._scroll_id,
+            scroll: '1m'
+          });
+        }
+
+        // Clear scroll
+        if (scrollResponse._scroll_id) {
+          await client.clearScroll({ scroll_id: scrollResponse._scroll_id }).catch(() => {});
+        }
+        
+        if (documentIds.length > 1) {
+          duplicates.push({
+            value: String(value),
+            field,
+            documentIds,
+            keepId: documentIds[0], // Keep the oldest
+            deleteIds: documentIds.slice(1) // Delete the rest
+          });
+        }
      }
    }
+
+    // Check if there are more results
+    afterKey = compositeAgg?.after_key;
+    hasMore = buckets.length === batchSize && afterKey;
+    
+    if (hasMore) {
+      process.stdout.write(`\r   Found ${duplicates.length} duplicate groups so far...`);
+    }
  }

  return duplicates;
 }

-async function removeDuplicates(parsedArgs: ParsedArgs) {
-  const client = new Redis({
-    host: REDIS_HOST,
-    port: REDIS_PORT,
-    password: REDIS_PASSWORD,
-    db: REDIS_DB,
-  });
+/**
+ * Phase 1: Initialize and connect to Elasticsearch
+ */
+async function phase1_InitAndConnect() {
+  console.log(`🔍 Hasher Duplicate Remover - Phase 1: Initialization`);
+  console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
+  console.log(`Elasticsearch: ${ELASTICSEARCH_NODE}`);
+  console.log(`Index: ${INDEX_NAME}`);
+  console.log('');

+  const client = new Client({ node: ELASTICSEARCH_NODE });
+
+  console.log('🔗 Connecting to Elasticsearch...');
+  await client.cluster.health({});
+  console.log('✅ Connected successfully\n');
+
+  const countResponse = await client.count({ index: INDEX_NAME });
+  console.log(`📊 Total documents in index: ${countResponse.count}\n`);
+
+  return { client, totalDocuments: countResponse.count };
+}
+
+/**
+ * Phase 2: Find duplicates for a specific field
+ */
+async function phase2_FindDuplicatesForField(
+  client: Client,
+  field: string,
+  batchSize: number,
+  seenDeleteIds: Set<string>
+): Promise<{ duplicates: DuplicateGroup[], totalFound: number }> {
+  console.log(`\n🔍 Phase 2: Checking duplicates for field: ${field}...`);
+  
+  const fieldDuplicates = await findDuplicatesForField(client, field, batchSize);
+  const duplicates: DuplicateGroup[] = [];
+  
+  // Filter out already seen delete IDs to avoid counting the same document multiple times
+  for (const dup of fieldDuplicates) {
+    const newDeleteIds = dup.deleteIds.filter(id => !seenDeleteIds.has(id));
+    if (newDeleteIds.length > 0) {
+      dup.deleteIds = newDeleteIds;
+      newDeleteIds.forEach(id => seenDeleteIds.add(id));
+      duplicates.push(dup);
+    }
+  }
+  
+  console.log(`   Found ${fieldDuplicates.length} duplicate groups for ${field}`);
+  console.log(`   New unique documents to delete: ${duplicates.reduce((sum, dup) => sum + dup.deleteIds.length, 0)}`);
+  
+  // Force garbage collection if available
+  if (global.gc) {
+    global.gc();
+    console.log(`   ♻️  Memory freed after processing ${field}`);
+  }
+  
+  return { duplicates, totalFound: fieldDuplicates.length };
+}
+
+/**
+ * Phase 3: Process deletion for a batch of duplicates
+ */
+async function phase3_DeleteBatch(
+  client: Client,
+  deleteIds: string[],
+  batchSize: number,
+  startIndex: number
+): Promise<{ deleted: number, errors: number }> {
+  const batch = deleteIds.slice(startIndex, startIndex + batchSize);
+  let deleted = 0;
+  let errors = 0;
+
+  try {
+    const bulkOperations = batch.flatMap(id => [
+      { delete: { _index: INDEX_NAME, _id: id } }
+    ]);
+
+    const bulkResponse = await client.bulk({
+      operations: bulkOperations,
+      refresh: false
+    });
+
+    if (bulkResponse.errors) {
+      const errorCount = bulkResponse.items.filter((item: any) => item.delete?.error).length;
+      errors += errorCount;
+      deleted += batch.length - errorCount;
+    } else {
+      deleted += batch.length;
+    }
+  } catch (error) {
+    console.error(`\n❌ Error deleting batch:`, error);
+    errors += batch.length;
+  }
+
+  // Force garbage collection if available
+  if (global.gc) {
+    global.gc();
+  }
+
+  return { deleted, errors };
+}
+
+/**
+ * Phase 4: Finalize and report results
+ */
+async function phase4_Finalize(
+  client: Client,
+  totalDeleted: number,
+  totalErrors: number,
+  initialDocumentCount: number
+) {
+  console.log('\n\n🔄 Phase 4: Refreshing index...');
+  await client.indices.refresh({ index: INDEX_NAME });
+
+  const newCountResponse = await client.count({ index: INDEX_NAME });
+
+  console.log('\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
+  console.log('✅ Duplicate removal complete!');
+  console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
+  console.log(`Documents deleted: ${totalDeleted}`);
+  console.log(`Errors: ${totalErrors}`);
+  console.log(`Previous document count: ${initialDocumentCount}`);
+  console.log(`New document count: ${newCountResponse.count}`);
+  console.log('');
+}
+
+async function removeDuplicates(parsedArgs: ParsedArgs) {
  const fields = parsedArgs.field 
    ? [parsedArgs.field] 
-    : ['md5', 'sha1', 'sha256', 'sha512'];
+    : ['plaintext', 'md5', 'sha1', 'sha256', 'sha512'];

-  console.log(`🔍 Hasher Duplicate Remover`);
-  console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
-  console.log(`Redis: ${REDIS_HOST}:${REDIS_PORT} (DB ${REDIS_DB})`);
-  console.log(`Index: ${INDEX_NAME}`);
  console.log(`Mode: ${parsedArgs.dryRun ? '🔎 DRY RUN (no changes)' : '⚠️  EXECUTE (will delete)'}`);
+  console.log(`Batch size: ${parsedArgs.batchSize}`);
  console.log(`Fields to check: ${fields.join(', ')}`);
  console.log('');

  try {
-    // Test connection
-    console.log('🔗 Connecting to Redis...');
-    await client.ping();
-    console.log('✅ Connected successfully\n');
+    // === PHASE 1: Initialize ===
+    const { client, totalDocuments } = await phase1_InitAndConnect();
    
-    // Get index stats
-    const stats = await client.hgetall('hash:stats');
-    const totalCount = parseInt(stats.count || '0', 10);
-    console.log(`📊 Total documents in index: ${totalCount}\n`);
-
-    const allDuplicates: DuplicateGroup[] = [];
-    const seenPlaintexts = new Set<string>();
-
-    // Find duplicates for each field
-    for (const field of fields) {
-      console.log(`🔍 Checking duplicates for field: ${field}...`);
-      const fieldDuplicates = await findDuplicatesForField(client, field);
-      
-      // Filter out already seen plaintexts
-      for (const dup of fieldDuplicates) {
-        const newDeletePlaintexts = dup.deletePlaintexts.filter(p => !seenPlaintexts.has(p));
-        if (newDeletePlaintexts.length > 0) {
-          dup.deletePlaintexts = newDeletePlaintexts;
-          newDeletePlaintexts.forEach(p => seenPlaintexts.add(p));
-          allDuplicates.push(dup);
-        }
-      }
-      
-      console.log(`   Found ${fieldDuplicates.length} duplicate groups for ${field}`);
+    // Force garbage collection after phase 1
+    if (global.gc) {
+      global.gc();
+      console.log('♻️  Memory freed after initialization\n');
    }

-    const totalToDelete = allDuplicates.reduce((sum, dup) => sum + dup.deletePlaintexts.length, 0);
+    // === PHASE 2: Find duplicates field by field ===
+    const allDuplicates: DuplicateGroup[] = [];
+    const seenDeleteIds = new Set<string>();
+
+    for (const field of fields) {
+      const { duplicates } = await phase2_FindDuplicatesForField(
+        client,
+        field,
+        parsedArgs.batchSize,
+        seenDeleteIds
+      );
+      allDuplicates.push(...duplicates);
+      
+      // Clear field duplicates to free memory
+      duplicates.length = 0;
+    }
+
+    const totalToDelete = allDuplicates.reduce((sum, dup) => sum + dup.deleteIds.length, 0);

    console.log(`\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
    console.log(`📋 Summary:`);
@@ -254,7 +397,6 @@ async function removeDuplicates(parsedArgs: ParsedArgs) {

    if (allDuplicates.length === 0) {
      console.log('✨ No duplicates found! Index is clean.\n');
-      await client.quit();
      return;
    }

@@ -267,8 +409,8 @@ async function removeDuplicates(parsedArgs: ParsedArgs) {
        : dup.value;
      console.log(`   Field: ${dup.field}`);
      console.log(`   Value: ${truncatedValue}`);
-      console.log(`   Keep: ${dup.keepPlaintext}`);
-      console.log(`   Delete: ${dup.deletePlaintexts.length} document(s)`);
+      console.log(`   Keep: ${dup.keepId}`);
+      console.log(`   Delete: ${dup.deleteIds.length} document(s)`);
      console.log('');
    }

@@ -281,70 +423,44 @@ async function removeDuplicates(parsedArgs: ParsedArgs) {
      console.log(`🔎 DRY RUN - No changes made`);
      console.log(`   Run with --execute to remove ${totalToDelete} duplicate documents`);
      console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n`);
-      await client.quit();
      return;
    }

-    // Execute deletion
-    console.log(`\n🗑️  Removing ${totalToDelete} duplicate documents...\n`);
+    // === PHASE 3: Execute deletion in batches ===
+    console.log(`\n🗑️  Phase 3: Removing ${totalToDelete} duplicate documents...\n`);

-    let deleted = 0;
-    let errors = 0;
+    let totalDeleted = 0;
+    let totalErrors = 0;
+    const deleteIds = allDuplicates.flatMap(dup => dup.deleteIds);

-    for (const dup of allDuplicates) {
-      for (const plaintext of dup.deletePlaintexts) {
-        try {
-          const docKey = `hash:plaintext:${plaintext}`;
-          const docData = await client.get(docKey);
+    // Clear allDuplicates to free memory
+    allDuplicates.length = 0;

-          if (docData) {
-            const doc: HashDocument = JSON.parse(docData);
-            const pipeline = client.pipeline();
+    // Delete in batches with memory management
+    for (let i = 0; i < deleteIds.length; i += parsedArgs.batchSize) {
+      const { deleted, errors } = await phase3_DeleteBatch(
+        client,
+        deleteIds,
+        parsedArgs.batchSize,
+        i
+      );
      
-            // Delete main document
-            pipeline.del(docKey);
+      totalDeleted += deleted;
+      totalErrors += errors;

-            // Delete all indexes
-            pipeline.del(`hash:index:md5:${doc.md5}`);
-            pipeline.del(`hash:index:sha1:${doc.sha1}`);
-            pipeline.del(`hash:index:sha256:${doc.sha256}`);
-            pipeline.del(`hash:index:sha512:${doc.sha512}`);
-            
-            // Update statistics
-            pipeline.hincrby('hash:stats', 'count', -1);
-            pipeline.hincrby('hash:stats', 'size', -JSON.stringify(doc).length);
-            
-            const results = await pipeline.exec();
-            
-            if (results && results.some(([err]) => err !== null)) {
-              errors++;
-            } else {
-              deleted++;
-            }
-          }
-          
-          process.stdout.write(`\r⏳ Progress: ${deleted + errors}/${totalToDelete} - Deleted: ${deleted}, Errors: ${errors}`);
-        } catch (error) {
-          console.error(`\n❌ Error deleting ${plaintext}:`, error);
-          errors++;
-        }
-      }
+      process.stdout.write(
+        `\r⏳ Progress: ${Math.min(i + parsedArgs.batchSize, deleteIds.length)}/${deleteIds.length} - ` +
+        `Deleted: ${totalDeleted}, Errors: ${totalErrors}`
+      );
    }

-    // Get new count
-    const newStats = await client.hgetall('hash:stats');
-    const newCount = parseInt(newStats.count || '0', 10);
+    // Clear deleteIds to free memory
+    deleteIds.length = 0;
+    seenDeleteIds.clear();

-    console.log('\n\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
-    console.log('✅ Duplicate removal complete!');
-    console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
-    console.log(`Documents deleted: ${deleted}`);
-    console.log(`Errors: ${errors}`);
-    console.log(`Previous document count: ${totalCount}`);
-    console.log(`New document count: ${newCount}`);
-    console.log('');
+    // === PHASE 4: Finalize ===
+    await phase4_Finalize(client, totalDeleted, totalErrors, totalDocuments);

-    await client.quit();
  } catch (error) {
    console.error('\n❌ Error:', error instanceof Error ? error.message : error);
    process.exit(1);
@@ -369,10 +485,11 @@ if (parsedArgs.field && !validFields.includes(parsedArgs.field)) {

 console.log(`\n🔧 Configuration:`);
 console.log(`   Mode: ${parsedArgs.dryRun ? 'dry-run' : 'execute'}`);
+console.log(`   Batch size: ${parsedArgs.batchSize}`);
 if (parsedArgs.field) {
  console.log(`   Field: ${parsedArgs.field}`);
 } else {
-  console.log(`   Fields: all (md5, sha1, sha256, sha512)`);
+  console.log(`   Fields: all (plaintext, md5, sha1, sha256, sha512)`);
 }
 console.log('');