added raw request handling, enanched attack detection for GET and POSTS, templatized suspicioius activity to fetch from wordlists.json, aligned helm to load new wordlist config, added migration scripts from 1.0.0 to new krawl versions, removed old and unused functions, added test scripts

This commit is contained in:
Patrick Di Fazio
2026-02-08 16:02:18 +01:00
parent 594eae7447
commit 771174c6a9
26 changed files with 2312 additions and 867 deletions

60
src/migrations/README.md Normal file
View File

@@ -0,0 +1,60 @@
# Database Migrations
This directory contains database migration scripts for Krawl.
From the 1.0.0 stable version we added some features that require schema changes and performance optimizations. These migration scripts ensure that existing users can seamlessly upgrade without data loss or downtime.
## Available Migrations
### add_raw_request_column.py
Adds the `raw_request` column to the `access_logs` table to store complete HTTP requests for forensic analysis.
**Usage:**
```bash
# Run with default database path (src/data/krawl.db)
python3 migrations/add_raw_request_column.py
# Run with custom database path
python3 migrations/add_raw_request_column.py /path/to/krawl.db
```
### add_performance_indexes.py
Adds critical performance indexes to the `attack_detections` table for efficient aggregation and filtering with large datasets (100k+ records).
**Indexes Added:**
- `ix_attack_detections_attack_type` - Speeds up GROUP BY on attack_type
- `ix_attack_detections_type_log` - Composite index for attack_type + access_log_id
**Usage:**
```bash
# Run with default database path
python3 migrations/add_performance_indexes.py
# Run with custom database path
python3 migrations/add_performance_indexes.py /path/to/krawl.db
```
**Post-Migration Optimization:**
```bash
# Compact database and update query planner statistics
sqlite3 /path/to/krawl.db "VACUUM; ANALYZE;"
```
## Running Migrations
All migration scripts are designed to be idempotent and safe to run multiple times. They will:
1. Check if the migration is already applied
2. Skip if already applied
3. Apply the migration if needed
4. Report the result
## Creating New Migrations
When creating a new migration:
1. Name the file descriptively: `action_description.py`
2. Make it idempotent (safe to run multiple times)
3. Add checks before making changes
4. Provide clear error messages
5. Support custom database paths via command line
6. Update this README with usage instructions

View File

@@ -0,0 +1,120 @@
#!/usr/bin/env python3
"""
Migration script to add performance indexes to attack_detections table.
This dramatically improves query performance with large datasets (100k+ records).
"""
import sqlite3
import sys
import os
def index_exists(cursor, index_name: str) -> bool:
"""Check if an index exists."""
cursor.execute("SELECT name FROM sqlite_master WHERE type='index' AND name=?", (index_name,))
return cursor.fetchone() is not None
def add_performance_indexes(db_path: str) -> bool:
"""
Add performance indexes to optimize queries.
Args:
db_path: Path to the SQLite database file
Returns:
True if indexes were added or already exist, False on error
"""
try:
# Check if database exists
if not os.path.exists(db_path):
print(f"Database file not found: {db_path}")
return False
# Connect to database
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
indexes_added = []
indexes_existed = []
# Index 1: attack_type for efficient GROUP BY operations
if not index_exists(cursor, "ix_attack_detections_attack_type"):
print("Adding index on attack_detections.attack_type...")
cursor.execute("""
CREATE INDEX ix_attack_detections_attack_type
ON attack_detections(attack_type)
""")
indexes_added.append("ix_attack_detections_attack_type")
else:
indexes_existed.append("ix_attack_detections_attack_type")
# Index 2: Composite index for attack_type + access_log_id
if not index_exists(cursor, "ix_attack_detections_type_log"):
print("Adding composite index on attack_detections(attack_type, access_log_id)...")
cursor.execute("""
CREATE INDEX ix_attack_detections_type_log
ON attack_detections(attack_type, access_log_id)
""")
indexes_added.append("ix_attack_detections_type_log")
else:
indexes_existed.append("ix_attack_detections_type_log")
conn.commit()
conn.close()
# Report results
if indexes_added:
print(f"Successfully added {len(indexes_added)} index(es):")
for idx in indexes_added:
print(f" - {idx}")
if indexes_existed:
print(f" {len(indexes_existed)} index(es) already existed:")
for idx in indexes_existed:
print(f" - {idx}")
if not indexes_added and not indexes_existed:
print("No indexes processed")
return True
except sqlite3.Error as e:
print(f"SQLite error: {e}")
return False
except Exception as e:
print(f"Unexpected error: {e}")
return False
def main():
"""Main migration function."""
# Default database path
default_db_path = os.path.join(
os.path.dirname(os.path.dirname(__file__)),
"data",
"krawl.db"
)
# Allow custom path as command line argument
db_path = sys.argv[1] if len(sys.argv) > 1 else default_db_path
print(f"Adding performance indexes to database: {db_path}")
print("=" * 60)
success = add_performance_indexes(db_path)
print("=" * 60)
if success:
print("Migration completed successfully")
print("\n💡 Performance tip: Run 'VACUUM' and 'ANALYZE' on your database")
print(" to optimize query planner statistics after adding indexes.")
sys.exit(0)
else:
print("Migration failed")
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,95 @@
#!/usr/bin/env python3
"""
Migration script to add raw_request column to access_logs table.
This script is safe to run multiple times - it checks if the column exists before adding it.
"""
import sqlite3
import sys
import os
from pathlib import Path
def column_exists(cursor, table_name: str, column_name: str) -> bool:
"""Check if a column exists in a table."""
cursor.execute(f"PRAGMA table_info({table_name})")
columns = [row[1] for row in cursor.fetchall()]
return column_name in columns
def add_raw_request_column(db_path: str) -> bool:
"""
Add raw_request column to access_logs table if it doesn't exist.
Args:
db_path: Path to the SQLite database file
Returns:
True if column was added or already exists, False on error
"""
try:
# Check if database exists
if not os.path.exists(db_path):
print(f"Database file not found: {db_path}")
return False
# Connect to database
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# Check if column already exists
if column_exists(cursor, "access_logs", "raw_request"):
print("Column 'raw_request' already exists in access_logs table")
conn.close()
return True
# Add the column
print("Adding 'raw_request' column to access_logs table...")
cursor.execute("""
ALTER TABLE access_logs
ADD COLUMN raw_request TEXT
""")
conn.commit()
conn.close()
print("✅ Successfully added 'raw_request' column to access_logs table")
return True
except sqlite3.Error as e:
print(f"SQLite error: {e}")
return False
except Exception as e:
print(f"Unexpected error: {e}")
return False
def main():
"""Main migration function."""
# Default database path
default_db_path = os.path.join(
os.path.dirname(os.path.dirname(__file__)),
"data",
"krawl.db"
)
# Allow custom path as command line argument
db_path = sys.argv[1] if len(sys.argv) > 1 else default_db_path
print(f"🔄 Running migration on database: {db_path}")
print("=" * 60)
success = add_raw_request_column(db_path)
print("=" * 60)
if success:
print("Migration completed successfully")
sys.exit(0)
else:
print("Migration failed")
sys.exit(1)
if __name__ == "__main__":
main()