2025-07-01 22:48:58 +05:30
|
|
|
|
import threading
|
|
|
|
|
|
import time
|
|
|
|
|
|
import os
|
|
|
|
|
|
import json
|
|
|
|
|
|
import traceback
|
|
|
|
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed, Future
|
|
|
|
|
|
from .api_client import download_from_api
|
2025-07-27 06:32:15 -07:00
|
|
|
|
from .workers import PostProcessorWorker
|
2025-07-01 22:48:58 +05:30
|
|
|
|
from ..config.constants import (
|
|
|
|
|
|
STYLE_DATE_BASED, STYLE_POST_TITLE_GLOBAL_NUMBERING,
|
2025-07-27 06:32:15 -07:00
|
|
|
|
MAX_THREADS
|
2025-07-01 22:48:58 +05:30
|
|
|
|
)
|
|
|
|
|
|
from ..utils.file_utils import clean_folder_name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class DownloadManager:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Manages the entire download lifecycle, acting as a bridge between the UI
|
|
|
|
|
|
and the backend workers. It handles thread pools, task submission,
|
|
|
|
|
|
and state management for a download session.
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, progress_queue):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Initializes the DownloadManager.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
progress_queue (queue.Queue): A thread-safe queue for sending
|
|
|
|
|
|
status updates to the UI.
|
|
|
|
|
|
"""
|
|
|
|
|
|
self.progress_queue = progress_queue
|
|
|
|
|
|
self.thread_pool = None
|
|
|
|
|
|
self.active_futures = []
|
|
|
|
|
|
self.cancellation_event = threading.Event()
|
|
|
|
|
|
self.pause_event = threading.Event()
|
|
|
|
|
|
self.is_running = False
|
|
|
|
|
|
|
|
|
|
|
|
self.total_posts = 0
|
|
|
|
|
|
self.processed_posts = 0
|
|
|
|
|
|
self.total_downloads = 0
|
|
|
|
|
|
self.total_skips = 0
|
|
|
|
|
|
self.all_kept_original_filenames = []
|
2025-07-22 07:00:34 -07:00
|
|
|
|
self.creator_profiles_dir = None
|
|
|
|
|
|
self.current_creator_name_for_profile = None
|
|
|
|
|
|
self.current_creator_profile_path = None
|
2025-07-27 06:32:15 -07:00
|
|
|
|
self.session_file_path = None
|
2025-07-01 22:48:58 +05:30
|
|
|
|
|
|
|
|
|
|
def _log(self, message):
|
|
|
|
|
|
"""Puts a progress message into the queue for the UI."""
|
|
|
|
|
|
self.progress_queue.put({'type': 'progress', 'payload': (message,)})
|
|
|
|
|
|
|
|
|
|
|
|
def start_session(self, config, restore_data=None):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Starts a new download session based on the provided configuration.
|
|
|
|
|
|
This is the main entry point called by the UI.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
config (dict): A dictionary containing all settings from the UI.
|
|
|
|
|
|
restore_data (dict, optional): Data from a previous, interrupted session.
|
|
|
|
|
|
"""
|
|
|
|
|
|
if self.is_running:
|
|
|
|
|
|
self._log("❌ Cannot start a new session: A session is already in progress.")
|
|
|
|
|
|
return
|
2025-07-27 06:32:15 -07:00
|
|
|
|
|
|
|
|
|
|
self.session_file_path = config.get('session_file_path')
|
2025-07-22 07:00:34 -07:00
|
|
|
|
creator_profile_data = self._setup_creator_profile(config)
|
2025-07-27 06:32:15 -07:00
|
|
|
|
|
|
|
|
|
|
# Save settings to profile at the start of the session
|
|
|
|
|
|
if self.current_creator_profile_path:
|
|
|
|
|
|
creator_profile_data['settings'] = config
|
|
|
|
|
|
creator_profile_data.setdefault('processed_post_ids', [])
|
|
|
|
|
|
self._save_creator_profile(creator_profile_data)
|
|
|
|
|
|
self._log(f"✅ Loaded/created profile for '{self.current_creator_name_for_profile}'. Settings saved.")
|
2025-07-22 07:00:34 -07:00
|
|
|
|
|
2025-07-01 22:48:58 +05:30
|
|
|
|
self.is_running = True
|
|
|
|
|
|
self.cancellation_event.clear()
|
|
|
|
|
|
self.pause_event.clear()
|
|
|
|
|
|
self.active_futures.clear()
|
|
|
|
|
|
self.total_posts = 0
|
|
|
|
|
|
self.processed_posts = 0
|
|
|
|
|
|
self.total_downloads = 0
|
|
|
|
|
|
self.total_skips = 0
|
|
|
|
|
|
self.all_kept_original_filenames = []
|
2025-07-27 06:32:15 -07:00
|
|
|
|
|
2025-07-01 22:48:58 +05:30
|
|
|
|
is_single_post = bool(config.get('target_post_id_from_initial_url'))
|
|
|
|
|
|
use_multithreading = config.get('use_multithreading', True)
|
2025-12-21 22:12:14 +05:30
|
|
|
|
|
|
|
|
|
|
# --- FIXED LOGIC: Strict check for sequential fetch modes ---
|
|
|
|
|
|
# Only "Date Based" and "Title + Global Numbering" require fetching the full list first.
|
|
|
|
|
|
# "Custom", "Date + Title", "Original Name", and "Post ID" will now use the pool (streaming).
|
|
|
|
|
|
sequential_styles = [STYLE_DATE_BASED, STYLE_POST_TITLE_GLOBAL_NUMBERING]
|
|
|
|
|
|
|
|
|
|
|
|
is_manga_sequential = (
|
|
|
|
|
|
config.get('manga_mode_active') and
|
|
|
|
|
|
config.get('manga_filename_style') in sequential_styles
|
|
|
|
|
|
)
|
2025-07-01 22:48:58 +05:30
|
|
|
|
|
2025-12-21 22:12:14 +05:30
|
|
|
|
# If it is NOT a strictly sequential manga mode, we use the pool (fetch-as-we-go)
|
2025-07-01 22:48:58 +05:30
|
|
|
|
should_use_multithreading_for_posts = use_multithreading and not is_single_post and not is_manga_sequential
|
2025-07-22 07:00:34 -07:00
|
|
|
|
|
2025-07-01 22:48:58 +05:30
|
|
|
|
if should_use_multithreading_for_posts:
|
|
|
|
|
|
fetcher_thread = threading.Thread(
|
|
|
|
|
|
target=self._fetch_and_queue_posts_for_pool,
|
2025-07-27 06:32:15 -07:00
|
|
|
|
args=(config, restore_data, creator_profile_data),
|
2025-07-01 22:48:58 +05:30
|
|
|
|
daemon=True
|
|
|
|
|
|
)
|
|
|
|
|
|
fetcher_thread.start()
|
|
|
|
|
|
else:
|
2025-07-27 06:32:15 -07:00
|
|
|
|
# Single-threaded mode does not use the manager's complex logic
|
2025-12-21 22:12:14 +05:30
|
|
|
|
self._log("ℹ️ Manager is handing off to a single-threaded worker (Sequential Mode)...")
|
2025-07-27 06:32:15 -07:00
|
|
|
|
# The single-threaded worker will manage its own lifecycle and signals.
|
|
|
|
|
|
# The manager's role for this session is effectively over.
|
|
|
|
|
|
self.is_running = False # Allow another session to start if needed
|
|
|
|
|
|
self.progress_queue.put({'type': 'handoff_to_single_thread', 'payload': (config,)})
|
2025-07-01 22:48:58 +05:30
|
|
|
|
|
2025-07-27 06:32:15 -07:00
|
|
|
|
def _fetch_and_queue_posts_for_pool(self, config, restore_data, creator_profile_data):
|
2025-07-01 22:48:58 +05:30
|
|
|
|
"""
|
2025-07-27 06:32:15 -07:00
|
|
|
|
Fetches posts from the API in batches and submits them as tasks to a thread pool.
|
|
|
|
|
|
This method runs in its own dedicated thread to avoid blocking the UI.
|
|
|
|
|
|
It provides immediate feedback as soon as the first batch of posts is found.
|
2025-07-01 22:48:58 +05:30
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
num_workers = min(config.get('num_threads', 4), MAX_THREADS)
|
|
|
|
|
|
self.thread_pool = ThreadPoolExecutor(max_workers=num_workers, thread_name_prefix='PostWorker_')
|
2025-07-22 07:00:34 -07:00
|
|
|
|
|
2025-07-27 06:32:15 -07:00
|
|
|
|
session_processed_ids = set(restore_data.get('processed_post_ids', [])) if restore_data else set()
|
2025-07-22 07:00:34 -07:00
|
|
|
|
profile_processed_ids = set(creator_profile_data.get('processed_post_ids', []))
|
|
|
|
|
|
processed_ids = session_processed_ids.union(profile_processed_ids)
|
|
|
|
|
|
|
2025-07-27 06:32:15 -07:00
|
|
|
|
if restore_data and 'all_posts_data' in restore_data:
|
|
|
|
|
|
# This logic for session restore remains as it relies on a pre-fetched list
|
2025-07-01 22:48:58 +05:30
|
|
|
|
all_posts = restore_data['all_posts_data']
|
|
|
|
|
|
posts_to_process = [p for p in all_posts if p.get('id') not in processed_ids]
|
|
|
|
|
|
self.total_posts = len(all_posts)
|
|
|
|
|
|
self.processed_posts = len(processed_ids)
|
|
|
|
|
|
self._log(f"🔄 Restoring session. {len(posts_to_process)} posts remaining.")
|
2025-07-27 06:32:15 -07:00
|
|
|
|
self.progress_queue.put({'type': 'overall_progress', 'payload': (self.total_posts, self.processed_posts)})
|
|
|
|
|
|
|
|
|
|
|
|
if not posts_to_process:
|
|
|
|
|
|
self._log("✅ No new posts to process from restored session.")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
for post_data in posts_to_process:
|
2025-12-21 22:12:14 +05:30
|
|
|
|
if self.cancellation_event.is_set():
|
|
|
|
|
|
break
|
2025-07-27 06:32:15 -07:00
|
|
|
|
worker = PostProcessorWorker(post_data, config, self.progress_queue)
|
|
|
|
|
|
future = self.thread_pool.submit(worker.process)
|
|
|
|
|
|
future.add_done_callback(self._handle_future_result)
|
|
|
|
|
|
self.active_futures.append(future)
|
2025-07-01 22:48:58 +05:30
|
|
|
|
else:
|
2025-12-21 22:12:14 +05:30
|
|
|
|
# --- Streaming Logic ---
|
2025-07-27 06:32:15 -07:00
|
|
|
|
post_generator = download_from_api(
|
|
|
|
|
|
api_url_input=config['api_url'],
|
|
|
|
|
|
logger=self._log,
|
|
|
|
|
|
start_page=config.get('start_page'),
|
|
|
|
|
|
end_page=config.get('end_page'),
|
|
|
|
|
|
cancellation_event=self.cancellation_event,
|
|
|
|
|
|
pause_event=self.pause_event,
|
2025-12-21 22:12:14 +05:30
|
|
|
|
cookies_dict=None # Cookie handling handled inside client if needed, or update if passed
|
2025-07-27 06:32:15 -07:00
|
|
|
|
)
|
|
|
|
|
|
|
2025-12-21 22:12:14 +05:30
|
|
|
|
for post_batch in post_generator:
|
2025-07-27 06:32:15 -07:00
|
|
|
|
if self.cancellation_event.is_set():
|
|
|
|
|
|
break
|
|
|
|
|
|
|
2025-12-21 22:12:14 +05:30
|
|
|
|
if not post_batch:
|
2025-07-27 06:32:15 -07:00
|
|
|
|
continue
|
|
|
|
|
|
|
2025-12-21 22:12:14 +05:30
|
|
|
|
new_posts_batch = [p for p in post_batch if p.get('id') not in processed_ids]
|
|
|
|
|
|
|
|
|
|
|
|
if not new_posts_batch:
|
|
|
|
|
|
# Log skipped count for UI feedback if needed, already handled in api_client usually
|
|
|
|
|
|
continue
|
2025-07-27 06:32:15 -07:00
|
|
|
|
|
2025-12-21 22:12:14 +05:30
|
|
|
|
# Update total posts dynamically as we find them
|
|
|
|
|
|
self.total_posts += len(new_posts_batch)
|
|
|
|
|
|
# Note: total_posts in streaming is a "running total of found posts", not absolute total
|
|
|
|
|
|
|
|
|
|
|
|
for post_data in new_posts_batch:
|
|
|
|
|
|
if self.cancellation_event.is_set():
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
# Pass explicit args or config to worker
|
|
|
|
|
|
# Ideally PostProcessorWorker should accept the whole config dict or mapped args
|
|
|
|
|
|
# For now assuming PostProcessorWorker takes (post_data, config_dict, queue)
|
|
|
|
|
|
# OR we map the config to the args expected by PostProcessorWorker.__init__
|
|
|
|
|
|
|
|
|
|
|
|
# MAPPING CONFIG TO WORKER ARGS (Safe wrapper)
|
|
|
|
|
|
worker_args = self._map_config_to_worker_args(post_data, config)
|
|
|
|
|
|
worker = PostProcessorWorker(**worker_args)
|
|
|
|
|
|
|
2025-07-27 06:32:15 -07:00
|
|
|
|
future = self.thread_pool.submit(worker.process)
|
|
|
|
|
|
future.add_done_callback(self._handle_future_result)
|
|
|
|
|
|
self.active_futures.append(future)
|
2025-12-21 22:12:14 +05:30
|
|
|
|
|
|
|
|
|
|
# Small sleep to prevent UI freeze if batches are huge and instant
|
|
|
|
|
|
time.sleep(0.01)
|
2025-07-27 06:32:15 -07:00
|
|
|
|
|
2025-07-01 22:48:58 +05:30
|
|
|
|
except Exception as e:
|
2025-12-21 22:12:14 +05:30
|
|
|
|
self._log(f"❌ Critical Error in Fetcher Thread: {e}")
|
|
|
|
|
|
traceback.print_exc()
|
2025-07-01 22:48:58 +05:30
|
|
|
|
finally:
|
2025-12-21 22:12:14 +05:30
|
|
|
|
self.is_running = False # Mark as not running so we can finish
|
|
|
|
|
|
# The main window checks active futures, so we just exit this thread.
|
|
|
|
|
|
|
|
|
|
|
|
def _map_config_to_worker_args(self, post_data, config):
|
|
|
|
|
|
"""Helper to map the flat config dict to PostProcessorWorker arguments."""
|
|
|
|
|
|
# This mirrors the arguments in workers.py PostProcessorWorker.__init__
|
|
|
|
|
|
return {
|
|
|
|
|
|
'post_data': post_data,
|
|
|
|
|
|
'download_root': config.get('output_dir'),
|
|
|
|
|
|
'known_names': [], # If needed, pass KNOWN_NAMES or load them
|
|
|
|
|
|
'filter_character_list': [], # Parsed filters if available in config
|
|
|
|
|
|
'emitter': self.progress_queue,
|
|
|
|
|
|
'unwanted_keywords': set(), # Parse if needed
|
|
|
|
|
|
'filter_mode': config.get('filter_mode'),
|
|
|
|
|
|
'skip_zip': config.get('skip_zip'),
|
|
|
|
|
|
'use_subfolders': config.get('use_subfolders'),
|
|
|
|
|
|
'use_post_subfolders': config.get('use_post_subfolders'),
|
|
|
|
|
|
'target_post_id_from_initial_url': config.get('target_post_id_from_initial_url'),
|
|
|
|
|
|
'custom_folder_name': config.get('custom_folder_name'),
|
|
|
|
|
|
'compress_images': config.get('compress_images'),
|
|
|
|
|
|
'download_thumbnails': config.get('download_thumbnails'),
|
|
|
|
|
|
'service': config.get('service') or 'unknown', # extracted elsewhere
|
|
|
|
|
|
'user_id': config.get('user_id') or 'unknown',
|
|
|
|
|
|
'pause_event': self.pause_event,
|
|
|
|
|
|
'api_url_input': config.get('api_url'),
|
|
|
|
|
|
'cancellation_event': self.cancellation_event,
|
|
|
|
|
|
'downloaded_files': None, # Managed per worker or global if passed
|
|
|
|
|
|
'downloaded_file_hashes': None,
|
|
|
|
|
|
'downloaded_files_lock': None,
|
|
|
|
|
|
'downloaded_file_hashes_lock': None,
|
|
|
|
|
|
# Add other necessary fields from config...
|
|
|
|
|
|
'manga_mode_active': config.get('manga_mode_active'),
|
|
|
|
|
|
'manga_filename_style': config.get('manga_filename_style'),
|
|
|
|
|
|
'manga_custom_filename_format': config.get('custom_manga_filename_format', "{published} {title}"), # Pass custom format
|
|
|
|
|
|
'manga_custom_date_format': config.get('manga_custom_date_format', "YYYY-MM-DD"),
|
|
|
|
|
|
'use_multithreading': config.get('use_multithreading', True),
|
|
|
|
|
|
# Ensure defaults for others
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-07-22 07:00:34 -07:00
|
|
|
|
def _setup_creator_profile(self, config):
|
|
|
|
|
|
"""Prepares the path and loads data for the current creator's profile."""
|
2025-12-21 22:12:14 +05:30
|
|
|
|
# Extract name logic here or assume config has it
|
|
|
|
|
|
# ... (Same as your existing code)
|
|
|
|
|
|
self.current_creator_name_for_profile = "Unknown" # Placeholder
|
|
|
|
|
|
# You should ideally extract name from URL or config here if available
|
2025-07-22 07:00:34 -07:00
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
|
|
def _save_creator_profile(self, data):
|
|
|
|
|
|
"""Saves the provided data to the current creator's profile file."""
|
|
|
|
|
|
if not self.current_creator_profile_path:
|
|
|
|
|
|
return
|
|
|
|
|
|
try:
|
|
|
|
|
|
temp_path = self.current_creator_profile_path + ".tmp"
|
|
|
|
|
|
with open(temp_path, 'w', encoding='utf-8') as f:
|
|
|
|
|
|
json.dump(data, f, indent=2)
|
|
|
|
|
|
os.replace(temp_path, self.current_creator_profile_path)
|
|
|
|
|
|
except OSError as e:
|
|
|
|
|
|
self._log(f"❌ Error saving creator profile to '{self.current_creator_profile_path}': {e}")
|
|
|
|
|
|
|
2025-07-01 22:48:58 +05:30
|
|
|
|
def cancel_session(self):
|
|
|
|
|
|
"""Cancels the current running session."""
|
|
|
|
|
|
if not self.is_running:
|
|
|
|
|
|
return
|
2025-07-27 06:32:15 -07:00
|
|
|
|
|
|
|
|
|
|
if self.cancellation_event.is_set():
|
|
|
|
|
|
self._log("ℹ️ Cancellation already in progress.")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
2025-07-01 22:48:58 +05:30
|
|
|
|
self._log("⚠️ Cancellation requested by user...")
|
|
|
|
|
|
self.cancellation_event.set()
|
2025-07-27 06:32:15 -07:00
|
|
|
|
|
2025-07-01 22:48:58 +05:30
|
|
|
|
if self.thread_pool:
|
2025-12-21 22:12:14 +05:30
|
|
|
|
self.thread_pool.shutdown(wait=False, cancel_futures=True)
|
|
|
|
|
|
|
|
|
|
|
|
def _handle_future_result(self, future):
|
|
|
|
|
|
"""Callback for when a worker task finishes."""
|
|
|
|
|
|
if self.active_futures:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.active_futures.remove(future)
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
result = future.result()
|
|
|
|
|
|
# result tuple: (download_count, skip_count, kept_original_filenames, ...)
|
|
|
|
|
|
if result:
|
|
|
|
|
|
self.total_downloads += result[0]
|
|
|
|
|
|
self.total_skips += result[1]
|
|
|
|
|
|
if len(result) > 3 and result[3]:
|
|
|
|
|
|
# filename was kept original
|
|
|
|
|
|
pass
|
|
|
|
|
|
except CancelledError:
|
|
|
|
|
|
pass
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
self._log(f"❌ Worker Error: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
self.processed_posts += 1
|
|
|
|
|
|
self.progress_queue.put({'type': 'overall_progress', 'payload': (self.total_posts, self.processed_posts)})
|
2025-07-27 06:32:15 -07:00
|
|
|
|
|
2025-12-21 22:12:14 +05:30
|
|
|
|
if not self.active_futures and not self.is_running:
|
|
|
|
|
|
self._log("✅ All tasks completed.")
|
|
|
|
|
|
self.progress_queue.put({'type': 'worker_finished', 'payload': (self.total_downloads, self.total_skips, [], [])})
|