refactor: optimize database queries by utilizing IpStats for performance improvements
This commit is contained in:
225
src/database.py
225
src/database.py
@@ -1384,26 +1384,20 @@ class DatabaseManager:
|
||||
"""
|
||||
session = self.session
|
||||
try:
|
||||
# Get server IP to filter it out
|
||||
from config import get_config
|
||||
|
||||
config = get_config()
|
||||
server_ip = config.get_server_ip()
|
||||
|
||||
query = session.query(IpStats.ip, IpStats.total_requests)
|
||||
query = self._public_ip_filter(query, IpStats.ip, server_ip)
|
||||
results = (
|
||||
session.query(AccessLog.ip, func.count(AccessLog.id).label("count"))
|
||||
.group_by(AccessLog.ip)
|
||||
.order_by(func.count(AccessLog.id).desc())
|
||||
query.order_by(IpStats.total_requests.desc())
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
|
||||
# Filter out local/private IPs and server IP, then limit results
|
||||
filtered = [
|
||||
(row.ip, row.count)
|
||||
for row in results
|
||||
if is_valid_public_ip(row.ip, server_ip)
|
||||
]
|
||||
return filtered[:limit]
|
||||
return [(row.ip, row.total_requests) for row in results]
|
||||
finally:
|
||||
self.close_session()
|
||||
|
||||
@@ -1470,23 +1464,18 @@ class DatabaseManager:
|
||||
"""
|
||||
session = self.session
|
||||
try:
|
||||
# Get server IP to filter it out
|
||||
from config import get_config
|
||||
|
||||
config = get_config()
|
||||
server_ip = config.get_server_ip()
|
||||
|
||||
logs = (
|
||||
query = (
|
||||
session.query(AccessLog)
|
||||
.filter(AccessLog.is_suspicious == True)
|
||||
.order_by(AccessLog.timestamp.desc())
|
||||
.all()
|
||||
)
|
||||
|
||||
# Filter out local/private IPs and server IP
|
||||
filtered_logs = [
|
||||
log for log in logs if is_valid_public_ip(log.ip, server_ip)
|
||||
]
|
||||
query = self._public_ip_filter(query, AccessLog.ip, server_ip)
|
||||
logs = query.limit(limit).all()
|
||||
|
||||
return [
|
||||
{
|
||||
@@ -1495,7 +1484,7 @@ class DatabaseManager:
|
||||
"user_agent": log.user_agent,
|
||||
"timestamp": log.timestamp.isoformat(),
|
||||
}
|
||||
for log in filtered_logs[:limit]
|
||||
for log in logs
|
||||
]
|
||||
finally:
|
||||
self.close_session()
|
||||
@@ -1600,44 +1589,54 @@ class DatabaseManager:
|
||||
|
||||
offset = (page - 1) * page_size
|
||||
|
||||
# Get honeypot triggers grouped by IP
|
||||
results = (
|
||||
session.query(AccessLog.ip, AccessLog.path)
|
||||
# Count distinct paths per IP using SQL GROUP BY
|
||||
count_col = func.count(distinct(AccessLog.path)).label("path_count")
|
||||
base_query = (
|
||||
session.query(AccessLog.ip, count_col)
|
||||
.filter(AccessLog.is_honeypot_trigger == True)
|
||||
.all()
|
||||
)
|
||||
base_query = self._public_ip_filter(base_query, AccessLog.ip, server_ip)
|
||||
base_query = base_query.group_by(AccessLog.ip)
|
||||
|
||||
# Group paths by IP, filtering out invalid IPs
|
||||
ip_paths: Dict[str, List[str]] = {}
|
||||
for row in results:
|
||||
if not is_valid_public_ip(row.ip, server_ip):
|
||||
continue
|
||||
if row.ip not in ip_paths:
|
||||
ip_paths[row.ip] = []
|
||||
if row.path not in ip_paths[row.ip]:
|
||||
ip_paths[row.ip].append(row.path)
|
||||
|
||||
# Create list and sort
|
||||
honeypot_list = [
|
||||
{"ip": ip, "paths": paths, "count": len(paths)}
|
||||
for ip, paths in ip_paths.items()
|
||||
]
|
||||
# Get total count of distinct honeypot IPs
|
||||
total_honeypots = base_query.count()
|
||||
|
||||
# Apply sorting
|
||||
if sort_by == "count":
|
||||
honeypot_list.sort(
|
||||
key=lambda x: x["count"], reverse=(sort_order == "desc")
|
||||
)
|
||||
else: # sort by ip
|
||||
honeypot_list.sort(
|
||||
key=lambda x: x["ip"], reverse=(sort_order == "desc")
|
||||
)
|
||||
order_expr = count_col.desc() if sort_order == "desc" else count_col.asc()
|
||||
else:
|
||||
order_expr = AccessLog.ip.desc() if sort_order == "desc" else AccessLog.ip.asc()
|
||||
|
||||
total_honeypots = len(honeypot_list)
|
||||
paginated = honeypot_list[offset : offset + page_size]
|
||||
total_pages = (total_honeypots + page_size - 1) // page_size
|
||||
ip_rows = base_query.order_by(order_expr).offset(offset).limit(page_size).all()
|
||||
|
||||
# Fetch distinct paths only for the paginated IPs
|
||||
paginated_ips = [row.ip for row in ip_rows]
|
||||
honeypot_list = []
|
||||
if paginated_ips:
|
||||
path_rows = (
|
||||
session.query(AccessLog.ip, AccessLog.path)
|
||||
.filter(
|
||||
AccessLog.is_honeypot_trigger == True,
|
||||
AccessLog.ip.in_(paginated_ips),
|
||||
)
|
||||
.distinct(AccessLog.ip, AccessLog.path)
|
||||
.all()
|
||||
)
|
||||
ip_paths: Dict[str, List[str]] = {}
|
||||
for row in path_rows:
|
||||
ip_paths.setdefault(row.ip, []).append(row.path)
|
||||
|
||||
# Preserve the order from the sorted query
|
||||
for row in ip_rows:
|
||||
paths = ip_paths.get(row.ip, [])
|
||||
honeypot_list.append(
|
||||
{"ip": row.ip, "paths": paths, "count": row.path_count}
|
||||
)
|
||||
|
||||
total_pages = max(1, (total_honeypots + page_size - 1) // page_size)
|
||||
|
||||
return {
|
||||
"honeypots": paginated,
|
||||
"honeypots": honeypot_list,
|
||||
"pagination": {
|
||||
"page": page,
|
||||
"page_size": page_size,
|
||||
@@ -1736,6 +1735,9 @@ class DatabaseManager:
|
||||
"""
|
||||
Retrieve paginated list of top IP addresses by access count.
|
||||
|
||||
Uses the IpStats table (which already stores total_requests per IP)
|
||||
instead of doing a costly GROUP BY on the large access_logs table.
|
||||
|
||||
Args:
|
||||
page: Page number (1-indexed)
|
||||
page_size: Number of results per page
|
||||
@@ -1754,39 +1756,34 @@ class DatabaseManager:
|
||||
|
||||
offset = (page - 1) * page_size
|
||||
|
||||
results = (
|
||||
session.query(
|
||||
AccessLog.ip,
|
||||
func.count(AccessLog.id).label("count"),
|
||||
IpStats.category,
|
||||
)
|
||||
.outerjoin(IpStats, AccessLog.ip == IpStats.ip)
|
||||
.group_by(AccessLog.ip, IpStats.category)
|
||||
.all()
|
||||
)
|
||||
base_query = session.query(IpStats)
|
||||
base_query = self._public_ip_filter(base_query, IpStats.ip, server_ip)
|
||||
|
||||
# Filter out local/private IPs and server IP, then sort
|
||||
filtered = [
|
||||
{
|
||||
"ip": row.ip,
|
||||
"count": row.count,
|
||||
"category": row.category or "unknown",
|
||||
}
|
||||
for row in results
|
||||
if is_valid_public_ip(row.ip, server_ip)
|
||||
]
|
||||
total_ips = base_query.count()
|
||||
|
||||
if sort_by == "count":
|
||||
filtered.sort(key=lambda x: x["count"], reverse=(sort_order == "desc"))
|
||||
else: # sort by ip
|
||||
filtered.sort(key=lambda x: x["ip"], reverse=(sort_order == "desc"))
|
||||
order_col = IpStats.total_requests
|
||||
else:
|
||||
order_col = IpStats.ip
|
||||
|
||||
total_ips = len(filtered)
|
||||
paginated = filtered[offset : offset + page_size]
|
||||
total_pages = (total_ips + page_size - 1) // page_size
|
||||
if sort_order == "desc":
|
||||
base_query = base_query.order_by(order_col.desc())
|
||||
else:
|
||||
base_query = base_query.order_by(order_col.asc())
|
||||
|
||||
results = base_query.offset(offset).limit(page_size).all()
|
||||
|
||||
total_pages = max(1, (total_ips + page_size - 1) // page_size)
|
||||
|
||||
return {
|
||||
"ips": paginated,
|
||||
"ips": [
|
||||
{
|
||||
"ip": row.ip,
|
||||
"count": row.total_requests,
|
||||
"category": row.category or "unknown",
|
||||
}
|
||||
for row in results
|
||||
],
|
||||
"pagination": {
|
||||
"page": page,
|
||||
"page_size": page_size,
|
||||
@@ -1820,28 +1817,27 @@ class DatabaseManager:
|
||||
try:
|
||||
offset = (page - 1) * page_size
|
||||
|
||||
results = (
|
||||
session.query(AccessLog.path, func.count(AccessLog.id).label("count"))
|
||||
count_col = func.count(AccessLog.id).label("count")
|
||||
|
||||
# Get total number of distinct paths
|
||||
total_paths = session.query(func.count(distinct(AccessLog.path))).scalar() or 0
|
||||
|
||||
# Build query with SQL-level sorting and pagination
|
||||
query = (
|
||||
session.query(AccessLog.path, count_col)
|
||||
.group_by(AccessLog.path)
|
||||
.all()
|
||||
)
|
||||
|
||||
# Create list and sort
|
||||
paths_list = [{"path": row.path, "count": row.count} for row in results]
|
||||
|
||||
if sort_by == "count":
|
||||
paths_list.sort(
|
||||
key=lambda x: x["count"], reverse=(sort_order == "desc")
|
||||
)
|
||||
else: # sort by path
|
||||
paths_list.sort(key=lambda x: x["path"], reverse=(sort_order == "desc"))
|
||||
order_expr = count_col.desc() if sort_order == "desc" else count_col.asc()
|
||||
else:
|
||||
order_expr = AccessLog.path.desc() if sort_order == "desc" else AccessLog.path.asc()
|
||||
|
||||
total_paths = len(paths_list)
|
||||
paginated = paths_list[offset : offset + page_size]
|
||||
total_pages = (total_paths + page_size - 1) // page_size
|
||||
results = query.order_by(order_expr).offset(offset).limit(page_size).all()
|
||||
total_pages = max(1, (total_paths + page_size - 1) // page_size)
|
||||
|
||||
return {
|
||||
"paths": paginated,
|
||||
"paths": [{"path": row.path, "count": row.count} for row in results],
|
||||
"pagination": {
|
||||
"page": page,
|
||||
"page_size": page_size,
|
||||
@@ -1875,33 +1871,40 @@ class DatabaseManager:
|
||||
try:
|
||||
offset = (page - 1) * page_size
|
||||
|
||||
results = (
|
||||
session.query(
|
||||
AccessLog.user_agent, func.count(AccessLog.id).label("count")
|
||||
)
|
||||
.filter(AccessLog.user_agent.isnot(None), AccessLog.user_agent != "")
|
||||
.group_by(AccessLog.user_agent)
|
||||
.all()
|
||||
count_col = func.count(AccessLog.id).label("count")
|
||||
|
||||
base_filter = [AccessLog.user_agent.isnot(None), AccessLog.user_agent != ""]
|
||||
|
||||
# Get total number of distinct user agents
|
||||
total_uas = (
|
||||
session.query(func.count(distinct(AccessLog.user_agent)))
|
||||
.filter(*base_filter)
|
||||
.scalar() or 0
|
||||
)
|
||||
|
||||
# Create list and sort
|
||||
ua_list = [
|
||||
{"user_agent": row.user_agent, "count": row.count} for row in results
|
||||
]
|
||||
# Build query with SQL-level sorting and pagination
|
||||
query = (
|
||||
session.query(AccessLog.user_agent, count_col)
|
||||
.filter(*base_filter)
|
||||
.group_by(AccessLog.user_agent)
|
||||
)
|
||||
|
||||
if sort_by == "count":
|
||||
ua_list.sort(key=lambda x: x["count"], reverse=(sort_order == "desc"))
|
||||
else: # sort by user_agent
|
||||
ua_list.sort(
|
||||
key=lambda x: x["user_agent"], reverse=(sort_order == "desc")
|
||||
order_expr = count_col.desc() if sort_order == "desc" else count_col.asc()
|
||||
else:
|
||||
order_expr = (
|
||||
AccessLog.user_agent.desc() if sort_order == "desc"
|
||||
else AccessLog.user_agent.asc()
|
||||
)
|
||||
|
||||
total_uas = len(ua_list)
|
||||
paginated = ua_list[offset : offset + page_size]
|
||||
total_pages = (total_uas + page_size - 1) // page_size
|
||||
results = query.order_by(order_expr).offset(offset).limit(page_size).all()
|
||||
total_pages = max(1, (total_uas + page_size - 1) // page_size)
|
||||
|
||||
return {
|
||||
"user_agents": paginated,
|
||||
"user_agents": [
|
||||
{"user_agent": row.user_agent, "count": row.count}
|
||||
for row in results
|
||||
],
|
||||
"pagination": {
|
||||
"page": page,
|
||||
"page_size": page_size,
|
||||
|
||||
Reference in New Issue
Block a user