This commit is contained in:
Yuvi9587
2025-07-30 21:31:02 -07:00
parent e7a6a91542
commit 9e996bf682
2 changed files with 157 additions and 151 deletions

View File

@@ -1,7 +1,7 @@
# --- Standard Library Imports --- # --- Standard Library Imports ---
# --- Standard Library Imports ---
import os import os
import time import time
import hashlib
import http.client import http.client
import traceback import traceback
import threading import threading
@@ -10,27 +10,49 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
# --- Third-Party Library Imports --- # --- Third-Party Library Imports ---
import requests import requests
MULTIPART_DOWNLOADER_AVAILABLE = True
# --- Module Constants --- # --- Module Constants ---
CHUNK_DOWNLOAD_RETRY_DELAY = 2 CHUNK_DOWNLOAD_RETRY_DELAY = 2
MAX_CHUNK_DOWNLOAD_RETRIES = 1 MAX_CHUNK_DOWNLOAD_RETRIES = 1
DOWNLOAD_CHUNK_SIZE_ITER = 1024 * 256 # 256 KB per iteration chunk DOWNLOAD_CHUNK_SIZE_ITER = 1024 * 256 # 256 KB per iteration chunk
# Flag to indicate if this module and its dependencies are available.
MULTIPART_DOWNLOADER_AVAILABLE = True
def _download_individual_chunk( def _download_individual_chunk(
chunk_url, temp_file_path, start_byte, end_byte, headers, chunk_url, chunk_temp_file_path, start_byte, end_byte, headers,
part_num, total_parts, progress_data, cancellation_event, part_num, total_parts, progress_data, cancellation_event,
skip_event, pause_event, global_emit_time_ref, cookies_for_chunk, skip_event, pause_event, global_emit_time_ref, cookies_for_chunk,
logger_func, emitter=None, api_original_filename=None logger_func, emitter=None, api_original_filename=None
): ):
""" """
Downloads a single segment (chunk) of a larger file. This function is Downloads a single segment (chunk) of a larger file to its own unique part file.
intended to be run in a separate thread by a ThreadPoolExecutor. This function is intended to be run in a separate thread by a ThreadPoolExecutor.
It handles retries, pauses, and cancellations for its specific chunk. It handles retries, pauses, and cancellations for its specific chunk. If a
download fails, the partial chunk file is removed, allowing a clean retry later.
Args:
chunk_url (str): The URL to download the file from.
chunk_temp_file_path (str): The unique path to save this specific chunk
(e.g., 'my_video.mp4.part0').
start_byte (int): The starting byte for the Range header.
end_byte (int): The ending byte for the Range header.
headers (dict): The HTTP headers to use for the request.
part_num (int): The index of this chunk (e.g., 0 for the first part).
total_parts (int): The total number of chunks for the entire file.
progress_data (dict): A thread-safe dictionary for sharing progress.
cancellation_event (threading.Event): Event to signal cancellation.
skip_event (threading.Event): Event to signal skipping the file.
pause_event (threading.Event): Event to signal pausing the download.
global_emit_time_ref (list): A mutable list with one element (a timestamp)
to rate-limit UI updates.
cookies_for_chunk (dict): Cookies to use for the request.
logger_func (function): A function to log messages.
emitter (queue.Queue or QObject): Emitter for sending progress to the UI.
api_original_filename (str): The original filename for UI display.
Returns:
tuple: A tuple containing (bytes_downloaded, success_flag).
""" """
# --- Pre-download checks for control events --- # --- Pre-download checks for control events ---
if cancellation_event and cancellation_event.is_set(): if cancellation_event and cancellation_event.is_set():
@@ -48,11 +70,9 @@ def _download_individual_chunk(
time.sleep(0.2) time.sleep(0.2)
logger_func(f" [Chunk {part_num + 1}/{total_parts}] Download resumed.") logger_func(f" [Chunk {part_num + 1}/{total_parts}] Download resumed.")
# --- START: FIX ---
# Set this chunk's status to 'active' before starting the download. # Set this chunk's status to 'active' before starting the download.
with progress_data['lock']: with progress_data['lock']:
progress_data['chunks_status'][part_num]['active'] = True progress_data['chunks_status'][part_num]['active'] = True
# --- END: FIX ---
try: try:
# Prepare headers for the specific byte range of this chunk # Prepare headers for the specific byte range of this chunk
@@ -82,8 +102,9 @@ def _download_individual_chunk(
response.raise_for_status() response.raise_for_status()
# --- Data Writing Loop --- # --- Data Writing Loop ---
with open(temp_file_path, 'r+b') as f: # We open the unique chunk file in write-binary ('wb') mode.
f.seek(start_byte) # No more seeking is required.
with open(chunk_temp_file_path, 'wb') as f:
for data_segment in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE_ITER): for data_segment in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE_ITER):
if cancellation_event and cancellation_event.is_set(): if cancellation_event and cancellation_event.is_set():
return bytes_this_chunk, False return bytes_this_chunk, False
@@ -123,6 +144,7 @@ def _download_individual_chunk(
elif hasattr(emitter, 'file_progress_signal'): elif hasattr(emitter, 'file_progress_signal'):
emitter.file_progress_signal.emit(api_original_filename, status_list_copy) emitter.file_progress_signal.emit(api_original_filename, status_list_copy)
# If we get here, the download for this chunk is successful
return bytes_this_chunk, True return bytes_this_chunk, True
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, http.client.IncompleteRead) as e: except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, http.client.IncompleteRead) as e:
@@ -134,8 +156,10 @@ def _download_individual_chunk(
logger_func(f" ❌ [Chunk {part_num + 1}/{total_parts}] Unexpected error: {e}\n{traceback.format_exc(limit=1)}") logger_func(f" ❌ [Chunk {part_num + 1}/{total_parts}] Unexpected error: {e}\n{traceback.format_exc(limit=1)}")
return bytes_this_chunk, False return bytes_this_chunk, False
# If the retry loop finishes without a successful download
return bytes_this_chunk, False return bytes_this_chunk, False
finally: finally:
# This block runs whether the download succeeded or failed
with progress_data['lock']: with progress_data['lock']:
progress_data['chunks_status'][part_num]['active'] = False progress_data['chunks_status'][part_num]['active'] = False
progress_data['chunks_status'][part_num]['speed_bps'] = 0.0 progress_data['chunks_status'][part_num]['speed_bps'] = 0.0
@@ -144,17 +168,37 @@ def _download_individual_chunk(
def download_file_in_parts(file_url, save_path, total_size, num_parts, headers, api_original_filename, def download_file_in_parts(file_url, save_path, total_size, num_parts, headers, api_original_filename,
emitter_for_multipart, cookies_for_chunk_session, emitter_for_multipart, cookies_for_chunk_session,
cancellation_event, skip_event, logger_func, pause_event): cancellation_event, skip_event, logger_func, pause_event):
logger_func(f"⬇️ Initializing Multi-part Download ({num_parts} parts) for: '{api_original_filename}' (Size: {total_size / (1024*1024):.2f} MB)") """
temp_file_path = save_path + ".part" Manages a resilient, multipart file download by saving each chunk to a separate file.
try: This function orchestrates the download process by:
with open(temp_file_path, 'wb') as f_temp: 1. Checking for already completed chunk files to resume a previous download.
if total_size > 0: 2. Submitting only the missing chunks to a thread pool for parallel download.
f_temp.truncate(total_size) 3. Assembling the final file from the individual chunks upon successful completion.
except IOError as e: 4. Cleaning up temporary chunk files after assembly.
logger_func(f" ❌ Error creating/truncating temp file '{temp_file_path}': {e}") 5. Leaving completed chunks on disk if the download fails, allowing for a future resume.
return False, 0, None, None
Args:
file_url (str): The URL of the file to download.
save_path (str): The final desired path for the downloaded file (e.g., 'my_video.mp4').
total_size (int): The total size of the file in bytes.
num_parts (int): The number of parts to split the download into.
headers (dict): HTTP headers for the download requests.
api_original_filename (str): The original filename for UI progress display.
emitter_for_multipart (queue.Queue or QObject): Emitter for UI signals.
cookies_for_chunk_session (dict): Cookies for the download requests.
cancellation_event (threading.Event): Event to signal cancellation.
skip_event (threading.Event): Event to signal skipping the file.
logger_func (function): A function for logging messages.
pause_event (threading.Event): Event to signal pausing the download.
Returns:
tuple: A tuple containing (success_flag, total_bytes_downloaded, md5_hash, file_handle).
The file_handle will be for the final assembled file if successful, otherwise None.
"""
logger_func(f"⬇️ Initializing Resumable Multi-part Download ({num_parts} parts) for: '{api_original_filename}' (Size: {total_size / (1024*1024):.2f} MB)")
# Calculate the byte range for each chunk
chunk_size_calc = total_size // num_parts chunk_size_calc = total_size // num_parts
chunks_ranges = [] chunks_ranges = []
for i in range(num_parts): for i in range(num_parts):
@@ -162,76 +206,119 @@ def download_file_in_parts(file_url, save_path, total_size, num_parts, headers,
end = start + chunk_size_calc - 1 if i < num_parts - 1 else total_size - 1 end = start + chunk_size_calc - 1 if i < num_parts - 1 else total_size - 1
if start <= end: if start <= end:
chunks_ranges.append((start, end)) chunks_ranges.append((start, end))
elif total_size == 0 and i == 0: elif total_size == 0 and i == 0: # Handle zero-byte files
chunks_ranges.append((0, -1)) chunks_ranges.append((0, -1))
# Calculate the expected size of each chunk
chunk_actual_sizes = [] chunk_actual_sizes = []
for start, end in chunks_ranges: for start, end in chunks_ranges:
if end == -1 and start == 0: chunk_actual_sizes.append(end - start + 1 if end != -1 else 0)
chunk_actual_sizes.append(0)
else:
chunk_actual_sizes.append(end - start + 1)
if not chunks_ranges and total_size > 0: if not chunks_ranges and total_size > 0:
logger_func(f" ⚠️ No valid chunk ranges for multipart download of '{api_original_filename}'. Aborting multipart.") logger_func(f" ⚠️ No valid chunk ranges for multipart download of '{api_original_filename}'. Aborting.")
if os.path.exists(temp_file_path): os.remove(temp_file_path)
return False, 0, None, None return False, 0, None, None
# --- Resumption Logic: Check for existing complete chunks ---
chunks_to_download = []
total_bytes_resumed = 0
for i, (start, end) in enumerate(chunks_ranges):
chunk_part_path = f"{save_path}.part{i}"
expected_chunk_size = chunk_actual_sizes[i]
if os.path.exists(chunk_part_path) and os.path.getsize(chunk_part_path) == expected_chunk_size:
logger_func(f" [Chunk {i + 1}/{num_parts}] Resuming with existing complete chunk file.")
total_bytes_resumed += expected_chunk_size
else:
chunks_to_download.append({'index': i, 'start': start, 'end': end})
# Setup the shared progress data structure
progress_data = { progress_data = {
'total_file_size': total_size, 'total_file_size': total_size,
'total_downloaded_so_far': 0, 'total_downloaded_so_far': total_bytes_resumed,
'chunks_status': [ 'chunks_status': [],
{'id': i, 'downloaded': 0, 'total': chunk_actual_sizes[i] if i < len(chunk_actual_sizes) else 0, 'active': False, 'speed_bps': 0.0}
for i in range(num_parts)
],
'lock': threading.Lock(), 'lock': threading.Lock(),
'last_global_emit_time': [time.time()] 'last_global_emit_time': [time.time()]
} }
for i in range(num_parts):
is_resumed = not any(c['index'] == i for c in chunks_to_download)
progress_data['chunks_status'].append({
'id': i,
'downloaded': chunk_actual_sizes[i] if is_resumed else 0,
'total': chunk_actual_sizes[i],
'active': False,
'speed_bps': 0.0
})
# --- Download Phase ---
chunk_futures = [] chunk_futures = []
all_chunks_successful = True all_chunks_successful = True
total_bytes_from_chunks = 0 total_bytes_from_threads = 0
with ThreadPoolExecutor(max_workers=num_parts, thread_name_prefix=f"MPChunk_{api_original_filename[:10]}_") as chunk_pool: with ThreadPoolExecutor(max_workers=num_parts, thread_name_prefix=f"MPChunk_{api_original_filename[:10]}_") as chunk_pool:
for i, (start, end) in enumerate(chunks_ranges): for chunk_info in chunks_to_download:
if cancellation_event and cancellation_event.is_set(): all_chunks_successful = False; break if cancellation_event and cancellation_event.is_set():
chunk_futures.append(chunk_pool.submit( all_chunks_successful = False
_download_individual_chunk, chunk_url=file_url, temp_file_path=temp_file_path, break
i, start, end = chunk_info['index'], chunk_info['start'], chunk_info['end']
chunk_part_path = f"{save_path}.part{i}"
future = chunk_pool.submit(
_download_individual_chunk,
chunk_url=file_url,
chunk_temp_file_path=chunk_part_path,
start_byte=start, end_byte=end, headers=headers, part_num=i, total_parts=num_parts, start_byte=start, end_byte=end, headers=headers, part_num=i, total_parts=num_parts,
progress_data=progress_data, cancellation_event=cancellation_event, skip_event=skip_event, global_emit_time_ref=progress_data['last_global_emit_time'], progress_data=progress_data, cancellation_event=cancellation_event,
pause_event=pause_event, cookies_for_chunk=cookies_for_chunk_session, logger_func=logger_func, emitter=emitter_for_multipart, skip_event=skip_event, global_emit_time_ref=progress_data['last_global_emit_time'],
pause_event=pause_event, cookies_for_chunk=cookies_for_chunk_session,
logger_func=logger_func, emitter=emitter_for_multipart,
api_original_filename=api_original_filename api_original_filename=api_original_filename
)) )
chunk_futures.append(future)
for future in as_completed(chunk_futures): for future in as_completed(chunk_futures):
if cancellation_event and cancellation_event.is_set(): all_chunks_successful = False; break if cancellation_event and cancellation_event.is_set():
bytes_downloaded_this_chunk, success_this_chunk = future.result()
total_bytes_from_chunks += bytes_downloaded_this_chunk
if not success_this_chunk:
all_chunks_successful = False all_chunks_successful = False
bytes_downloaded, success = future.result()
total_bytes_from_threads += bytes_downloaded
if not success:
all_chunks_successful = False
total_bytes_final = total_bytes_resumed + total_bytes_from_threads
if cancellation_event and cancellation_event.is_set(): if cancellation_event and cancellation_event.is_set():
logger_func(f" Multi-part download for '{api_original_filename}' cancelled by main event.") logger_func(f" Multi-part download for '{api_original_filename}' cancelled by main event.")
all_chunks_successful = False all_chunks_successful = False
if emitter_for_multipart:
with progress_data['lock']:
status_list_copy = [dict(s) for s in progress_data['chunks_status']]
if isinstance(emitter_for_multipart, queue.Queue):
emitter_for_multipart.put({'type': 'file_progress', 'payload': (api_original_filename, status_list_copy)})
elif hasattr(emitter_for_multipart, 'file_progress_signal'):
emitter_for_multipart.file_progress_signal.emit(api_original_filename, status_list_copy)
if all_chunks_successful and (total_bytes_from_chunks == total_size or total_size == 0): # --- Assembly and Cleanup Phase ---
logger_func(f" ✅ Multi-part download successful for '{api_original_filename}'. Total bytes: {total_bytes_from_chunks}") if all_chunks_successful and (total_bytes_final == total_size or total_size == 0):
logger_func(f" ✅ All {num_parts} chunks complete. Assembling final file...")
md5_hasher = hashlib.md5() md5_hasher = hashlib.md5()
with open(temp_file_path, 'rb') as f_hash: try:
for buf in iter(lambda: f_hash.read(4096*10), b''): with open(save_path, 'wb') as final_file:
md5_hasher.update(buf) for i in range(num_parts):
calculated_hash = md5_hasher.hexdigest() chunk_part_path = f"{save_path}.part{i}"
return True, total_bytes_from_chunks, calculated_hash, open(temp_file_path, 'rb') with open(chunk_part_path, 'rb') as chunk_file:
content = chunk_file.read()
final_file.write(content)
md5_hasher.update(content)
calculated_hash = md5_hasher.hexdigest()
logger_func(f" ✅ Assembly successful for '{api_original_filename}'. Total bytes: {total_bytes_final}")
return True, total_bytes_final, calculated_hash, open(save_path, 'rb')
except Exception as e:
logger_func(f" ❌ Critical error during file assembly: {e}. Cleaning up.")
return False, total_bytes_final, None, None
finally:
# Cleanup all individual chunk files after successful assembly
for i in range(num_parts):
chunk_part_path = f"{save_path}.part{i}"
if os.path.exists(chunk_part_path):
try:
os.remove(chunk_part_path)
except OSError as e:
logger_func(f" ⚠️ Failed to remove temp part file '{chunk_part_path}': {e}")
else: else:
logger_func(f" ❌ Multi-part download failed for '{api_original_filename}'. Success: {all_chunks_successful}, Bytes: {total_bytes_from_chunks}/{total_size}. Cleaning up.") # If download failed, we do NOT clean up, allowing for resumption later
if os.path.exists(temp_file_path): logger_func(f" ❌ Multi-part download failed for '{api_original_filename}'. Success: {all_chunks_successful}, Bytes: {total_bytes_final}/{total_size}. Partial chunks saved for future resumption.")
try: os.remove(temp_file_path) return False, total_bytes_final, None, None
except OSError as e: logger_func(f" Failed to remove temp part file '{temp_file_path}': {e}")
return False, total_bytes_from_chunks, None, None

View File

@@ -1,81 +0,0 @@
from PyQt5.QtWidgets import (
QDialog, QVBoxLayout, QGroupBox, QCheckBox, QHBoxLayout, QLabel,
QLineEdit, QDialogButtonBox, QIntValidator
)
from PyQt5.QtCore import Qt
class MultipartDialog(QDialog):
"""
A dialog for configuring multipart download settings.
"""
def __init__(self, current_settings, parent=None):
super().__init__(parent)
self.setWindowTitle("Multipart Download Options")
self.setMinimumWidth(350)
self.settings = current_settings
# Main layout
layout = QVBoxLayout(self)
# File types group
types_group = QGroupBox("Apply to File Types")
types_layout = QVBoxLayout()
self.videos_checkbox = QCheckBox("Videos")
self.archives_checkbox = QCheckBox("Archives")
types_layout.addWidget(self.videos_checkbox)
types_layout.addWidget(self.archives_checkbox)
types_group.setLayout(types_layout)
layout.addWidget(types_group)
# File size group
size_group = QGroupBox("Minimum File Size")
size_layout = QHBoxLayout()
size_layout.addWidget(QLabel("Apply only if file size is over:"))
self.min_size_input = QLineEdit()
self.min_size_input.setValidator(QIntValidator(0, 99999))
self.min_size_input.setFixedWidth(50)
size_layout.addWidget(self.min_size_input)
size_layout.addWidget(QLabel("MB"))
size_layout.addStretch()
size_group.setLayout(size_layout)
layout.addWidget(size_group)
# Custom extensions group
extensions_group = QGroupBox("Custom File Extensions")
extensions_layout = QVBoxLayout()
extensions_layout.addWidget(QLabel("Apply to these additional extensions (comma-separated):"))
self.extensions_input = QLineEdit()
self.extensions_input.setPlaceholderText("e.g., .psd, .blend, .mkv")
extensions_layout.addWidget(self.extensions_input)
extensions_group.setLayout(extensions_layout)
layout.addWidget(extensions_group)
# Dialog buttons
button_box = QDialogButtonBox(QDialogButtonBox.Save | QDialogButtonBox.Cancel)
button_box.accepted.connect(self.accept)
button_box.rejected.connect(self.reject)
layout.addWidget(button_box)
self.setLayout(layout)
self._load_initial_settings()
def _load_initial_settings(self):
"""Populates the dialog with the current settings."""
self.videos_checkbox.setChecked(self.settings.get('apply_on_videos', False))
self.archives_checkbox.setChecked(self.settings.get('apply_on_archives', False))
self.min_size_input.setText(str(self.settings.get('min_size_mb', 200)))
self.extensions_input.setText(", ".join(self.settings.get('custom_extensions', [])))
def get_selected_options(self):
"""Returns the configured settings from the dialog."""
custom_extensions_raw = self.extensions_input.text().strip().lower()
custom_extensions = {ext.strip() for ext in custom_extensions_raw.split(',') if ext.strip().startswith('.')}
return {
"enabled": True, # Implied if dialog is saved
"apply_on_videos": self.videos_checkbox.isChecked(),
"apply_on_archives": self.archives_checkbox.isChecked(),
"min_size_mb": int(self.min_size_input.text()) if self.min_size_input.text().isdigit() else 200,
"custom_extensions": sorted(list(custom_extensions))
}