added raw request handling, enanched attack detection for GET and POSTS, templatized suspicioius activity to fetch from wordlists.json, aligned helm to load new wordlist config, added migration scripts from 1.0.0 to new krawl versions, removed old and unused functions, added test scripts
This commit is contained in:
60
src/migrations/README.md
Normal file
60
src/migrations/README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
# Database Migrations
|
||||
|
||||
This directory contains database migration scripts for Krawl.
|
||||
From the 1.0.0 stable version we added some features that require schema changes and performance optimizations. These migration scripts ensure that existing users can seamlessly upgrade without data loss or downtime.
|
||||
|
||||
## Available Migrations
|
||||
|
||||
### add_raw_request_column.py
|
||||
|
||||
Adds the `raw_request` column to the `access_logs` table to store complete HTTP requests for forensic analysis.
|
||||
|
||||
**Usage:**
|
||||
```bash
|
||||
# Run with default database path (src/data/krawl.db)
|
||||
python3 migrations/add_raw_request_column.py
|
||||
|
||||
# Run with custom database path
|
||||
python3 migrations/add_raw_request_column.py /path/to/krawl.db
|
||||
```
|
||||
|
||||
### add_performance_indexes.py
|
||||
|
||||
Adds critical performance indexes to the `attack_detections` table for efficient aggregation and filtering with large datasets (100k+ records).
|
||||
|
||||
**Indexes Added:**
|
||||
- `ix_attack_detections_attack_type` - Speeds up GROUP BY on attack_type
|
||||
- `ix_attack_detections_type_log` - Composite index for attack_type + access_log_id
|
||||
|
||||
**Usage:**
|
||||
```bash
|
||||
# Run with default database path
|
||||
python3 migrations/add_performance_indexes.py
|
||||
|
||||
# Run with custom database path
|
||||
python3 migrations/add_performance_indexes.py /path/to/krawl.db
|
||||
```
|
||||
|
||||
**Post-Migration Optimization:**
|
||||
```bash
|
||||
# Compact database and update query planner statistics
|
||||
sqlite3 /path/to/krawl.db "VACUUM; ANALYZE;"
|
||||
```
|
||||
|
||||
## Running Migrations
|
||||
|
||||
All migration scripts are designed to be idempotent and safe to run multiple times. They will:
|
||||
1. Check if the migration is already applied
|
||||
2. Skip if already applied
|
||||
3. Apply the migration if needed
|
||||
4. Report the result
|
||||
|
||||
## Creating New Migrations
|
||||
|
||||
When creating a new migration:
|
||||
1. Name the file descriptively: `action_description.py`
|
||||
2. Make it idempotent (safe to run multiple times)
|
||||
3. Add checks before making changes
|
||||
4. Provide clear error messages
|
||||
5. Support custom database paths via command line
|
||||
6. Update this README with usage instructions
|
||||
120
src/migrations/add_performance_indexes.py
Normal file
120
src/migrations/add_performance_indexes.py
Normal file
@@ -0,0 +1,120 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Migration script to add performance indexes to attack_detections table.
|
||||
This dramatically improves query performance with large datasets (100k+ records).
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import sys
|
||||
import os
|
||||
|
||||
|
||||
def index_exists(cursor, index_name: str) -> bool:
|
||||
"""Check if an index exists."""
|
||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='index' AND name=?", (index_name,))
|
||||
return cursor.fetchone() is not None
|
||||
|
||||
|
||||
def add_performance_indexes(db_path: str) -> bool:
|
||||
"""
|
||||
Add performance indexes to optimize queries.
|
||||
|
||||
Args:
|
||||
db_path: Path to the SQLite database file
|
||||
|
||||
Returns:
|
||||
True if indexes were added or already exist, False on error
|
||||
"""
|
||||
try:
|
||||
# Check if database exists
|
||||
if not os.path.exists(db_path):
|
||||
print(f"Database file not found: {db_path}")
|
||||
return False
|
||||
|
||||
# Connect to database
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
indexes_added = []
|
||||
indexes_existed = []
|
||||
|
||||
# Index 1: attack_type for efficient GROUP BY operations
|
||||
if not index_exists(cursor, "ix_attack_detections_attack_type"):
|
||||
print("Adding index on attack_detections.attack_type...")
|
||||
cursor.execute("""
|
||||
CREATE INDEX ix_attack_detections_attack_type
|
||||
ON attack_detections(attack_type)
|
||||
""")
|
||||
indexes_added.append("ix_attack_detections_attack_type")
|
||||
else:
|
||||
indexes_existed.append("ix_attack_detections_attack_type")
|
||||
|
||||
# Index 2: Composite index for attack_type + access_log_id
|
||||
if not index_exists(cursor, "ix_attack_detections_type_log"):
|
||||
print("Adding composite index on attack_detections(attack_type, access_log_id)...")
|
||||
cursor.execute("""
|
||||
CREATE INDEX ix_attack_detections_type_log
|
||||
ON attack_detections(attack_type, access_log_id)
|
||||
""")
|
||||
indexes_added.append("ix_attack_detections_type_log")
|
||||
else:
|
||||
indexes_existed.append("ix_attack_detections_type_log")
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
# Report results
|
||||
if indexes_added:
|
||||
print(f"Successfully added {len(indexes_added)} index(es):")
|
||||
for idx in indexes_added:
|
||||
print(f" - {idx}")
|
||||
|
||||
if indexes_existed:
|
||||
print(f"ℹ️ {len(indexes_existed)} index(es) already existed:")
|
||||
for idx in indexes_existed:
|
||||
print(f" - {idx}")
|
||||
|
||||
if not indexes_added and not indexes_existed:
|
||||
print("No indexes processed")
|
||||
|
||||
return True
|
||||
|
||||
except sqlite3.Error as e:
|
||||
print(f"SQLite error: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"Unexpected error: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
"""Main migration function."""
|
||||
# Default database path
|
||||
default_db_path = os.path.join(
|
||||
os.path.dirname(os.path.dirname(__file__)),
|
||||
"data",
|
||||
"krawl.db"
|
||||
)
|
||||
|
||||
# Allow custom path as command line argument
|
||||
db_path = sys.argv[1] if len(sys.argv) > 1 else default_db_path
|
||||
|
||||
print(f"Adding performance indexes to database: {db_path}")
|
||||
print("=" * 60)
|
||||
|
||||
success = add_performance_indexes(db_path)
|
||||
|
||||
print("=" * 60)
|
||||
if success:
|
||||
print("Migration completed successfully")
|
||||
print("\n💡 Performance tip: Run 'VACUUM' and 'ANALYZE' on your database")
|
||||
print(" to optimize query planner statistics after adding indexes.")
|
||||
sys.exit(0)
|
||||
else:
|
||||
print("Migration failed")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
95
src/migrations/add_raw_request_column.py
Normal file
95
src/migrations/add_raw_request_column.py
Normal file
@@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Migration script to add raw_request column to access_logs table.
|
||||
This script is safe to run multiple times - it checks if the column exists before adding it.
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def column_exists(cursor, table_name: str, column_name: str) -> bool:
|
||||
"""Check if a column exists in a table."""
|
||||
cursor.execute(f"PRAGMA table_info({table_name})")
|
||||
columns = [row[1] for row in cursor.fetchall()]
|
||||
return column_name in columns
|
||||
|
||||
|
||||
def add_raw_request_column(db_path: str) -> bool:
|
||||
"""
|
||||
Add raw_request column to access_logs table if it doesn't exist.
|
||||
|
||||
Args:
|
||||
db_path: Path to the SQLite database file
|
||||
|
||||
Returns:
|
||||
True if column was added or already exists, False on error
|
||||
"""
|
||||
try:
|
||||
# Check if database exists
|
||||
if not os.path.exists(db_path):
|
||||
print(f"Database file not found: {db_path}")
|
||||
return False
|
||||
|
||||
# Connect to database
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Check if column already exists
|
||||
if column_exists(cursor, "access_logs", "raw_request"):
|
||||
print("Column 'raw_request' already exists in access_logs table")
|
||||
conn.close()
|
||||
return True
|
||||
|
||||
# Add the column
|
||||
print("Adding 'raw_request' column to access_logs table...")
|
||||
cursor.execute("""
|
||||
ALTER TABLE access_logs
|
||||
ADD COLUMN raw_request TEXT
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
print("✅ Successfully added 'raw_request' column to access_logs table")
|
||||
return True
|
||||
|
||||
except sqlite3.Error as e:
|
||||
print(f"SQLite error: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"Unexpected error: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
"""Main migration function."""
|
||||
# Default database path
|
||||
default_db_path = os.path.join(
|
||||
os.path.dirname(os.path.dirname(__file__)),
|
||||
"data",
|
||||
"krawl.db"
|
||||
)
|
||||
|
||||
# Allow custom path as command line argument
|
||||
db_path = sys.argv[1] if len(sys.argv) > 1 else default_db_path
|
||||
|
||||
print(f"🔄 Running migration on database: {db_path}")
|
||||
print("=" * 60)
|
||||
|
||||
success = add_raw_request_column(db_path)
|
||||
|
||||
print("=" * 60)
|
||||
if success:
|
||||
print("Migration completed successfully")
|
||||
sys.exit(0)
|
||||
else:
|
||||
print("Migration failed")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user