mirror of
https://github.com/Yuvi9587/Kemono-Downloader.git
synced 2025-12-29 16:14:44 +00:00
Commit
This commit is contained in:
@@ -237,6 +237,11 @@ class PostProcessorWorker:
|
|||||||
if self.check_cancel() or (skip_event and skip_event.is_set()):
|
if self.check_cancel() or (skip_event and skip_event.is_set()):
|
||||||
return 0, 1, "", False, FILE_DOWNLOAD_STATUS_SKIPPED, None
|
return 0, 1, "", False, FILE_DOWNLOAD_STATUS_SKIPPED, None
|
||||||
|
|
||||||
|
file_download_headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
|
||||||
|
'Referer': post_page_url
|
||||||
|
}
|
||||||
|
|
||||||
file_url = file_info.get('url')
|
file_url = file_info.get('url')
|
||||||
cookies_to_use_for_file = None
|
cookies_to_use_for_file = None
|
||||||
if self.use_cookie:
|
if self.use_cookie:
|
||||||
@@ -255,29 +260,23 @@ class PostProcessorWorker:
|
|||||||
self.logger(f" -> Skip File (Keyword in Original Name '{skip_word}'): '{api_original_filename}'. Scope: {self.skip_words_scope}")
|
self.logger(f" -> Skip File (Keyword in Original Name '{skip_word}'): '{api_original_filename}'. Scope: {self.skip_words_scope}")
|
||||||
return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None
|
return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_SKIPPED, None
|
||||||
|
|
||||||
cleaned_original_api_filename = clean_filename(api_original_filename)
|
cleaned_original_api_filename = robust_clean_name(api_original_filename)
|
||||||
original_filename_cleaned_base, original_ext = os.path.splitext(cleaned_original_api_filename)
|
original_filename_cleaned_base, original_ext = os.path.splitext(cleaned_original_api_filename)
|
||||||
if not original_ext.startswith('.'): original_ext = '.' + original_ext if original_ext else ''
|
if not original_ext.startswith('.'): original_ext = '.' + original_ext if original_ext else ''
|
||||||
|
|
||||||
if self.manga_mode_active:
|
if self.manga_mode_active:
|
||||||
if self.manga_filename_style == STYLE_ORIGINAL_NAME:
|
if self.manga_filename_style == STYLE_ORIGINAL_NAME:
|
||||||
# Get the post's publication or added date
|
|
||||||
published_date_str = self.post.get('published')
|
published_date_str = self.post.get('published')
|
||||||
added_date_str = self.post.get('added')
|
added_date_str = self.post.get('added')
|
||||||
formatted_date_str = "nodate" # Fallback if no date is found
|
formatted_date_str = "nodate"
|
||||||
|
|
||||||
date_to_use_str = published_date_str or added_date_str
|
date_to_use_str = published_date_str or added_date_str
|
||||||
|
|
||||||
if date_to_use_str:
|
if date_to_use_str:
|
||||||
try:
|
try:
|
||||||
# Extract just the YYYY-MM-DD part from the timestamp
|
|
||||||
formatted_date_str = date_to_use_str.split('T')[0]
|
formatted_date_str = date_to_use_str.split('T')[0]
|
||||||
except Exception:
|
except Exception:
|
||||||
self.logger(f" ⚠️ Could not parse date '{date_to_use_str}'. Using 'nodate' prefix.")
|
self.logger(f" ⚠️ Could not parse date '{date_to_use_str}'. Using 'nodate' prefix.")
|
||||||
else:
|
else:
|
||||||
self.logger(f" ⚠️ Post ID {original_post_id_for_log} has no date. Using 'nodate' prefix.")
|
self.logger(f" ⚠️ Post ID {original_post_id_for_log} has no date. Using 'nodate' prefix.")
|
||||||
|
|
||||||
# Combine the date with the cleaned original filename
|
|
||||||
filename_to_save_in_main_path = f"{formatted_date_str}_{cleaned_original_api_filename}"
|
filename_to_save_in_main_path = f"{formatted_date_str}_{cleaned_original_api_filename}"
|
||||||
was_original_name_kept_flag = True
|
was_original_name_kept_flag = True
|
||||||
elif self.manga_filename_style == STYLE_POST_TITLE:
|
elif self.manga_filename_style == STYLE_POST_TITLE:
|
||||||
@@ -303,7 +302,7 @@ class PostProcessorWorker:
|
|||||||
manga_date_file_counter_ref[0] += 1
|
manga_date_file_counter_ref[0] += 1
|
||||||
base_numbered_name = f"{counter_val_for_filename:03d}"
|
base_numbered_name = f"{counter_val_for_filename:03d}"
|
||||||
if self.manga_date_prefix and self.manga_date_prefix.strip():
|
if self.manga_date_prefix and self.manga_date_prefix.strip():
|
||||||
cleaned_prefix = clean_filename(self.manga_date_prefix.strip())
|
cleaned_prefix = robust_clean_name(self.manga_date_prefix.strip())
|
||||||
if cleaned_prefix:
|
if cleaned_prefix:
|
||||||
filename_to_save_in_main_path = f"{cleaned_prefix} {base_numbered_name}{original_ext}"
|
filename_to_save_in_main_path = f"{cleaned_prefix} {base_numbered_name}{original_ext}"
|
||||||
else:
|
else:
|
||||||
@@ -320,7 +319,7 @@ class PostProcessorWorker:
|
|||||||
with counter_lock:
|
with counter_lock:
|
||||||
counter_val_for_filename = manga_global_file_counter_ref[0]
|
counter_val_for_filename = manga_global_file_counter_ref[0]
|
||||||
manga_global_file_counter_ref[0] += 1
|
manga_global_file_counter_ref[0] += 1
|
||||||
cleaned_post_title_base_for_global = clean_filename(post_title.strip() if post_title and post_title.strip() else "post")
|
cleaned_post_title_base_for_global = robust_clean_name(post_title.strip() if post_title and post_title.strip() else "post")
|
||||||
filename_to_save_in_main_path = f"{cleaned_post_title_base_for_global}_{counter_val_for_filename:03d}{original_ext}"
|
filename_to_save_in_main_path = f"{cleaned_post_title_base_for_global}_{counter_val_for_filename:03d}{original_ext}"
|
||||||
else:
|
else:
|
||||||
self.logger(f"⚠️ Manga Title+GlobalNum Mode: Counter ref not provided or malformed for '{api_original_filename}'. Using original. Ref: {manga_global_file_counter_ref}")
|
self.logger(f"⚠️ Manga Title+GlobalNum Mode: Counter ref not provided or malformed for '{api_original_filename}'. Using original. Ref: {manga_global_file_counter_ref}")
|
||||||
@@ -353,7 +352,7 @@ class PostProcessorWorker:
|
|||||||
|
|
||||||
if post_title and post_title.strip():
|
if post_title and post_title.strip():
|
||||||
temp_cleaned_title = robust_clean_name(post_title.strip())
|
temp_cleaned_title = robust_clean_name(post_title.strip())
|
||||||
if not temp_cleaned_title or temp_cleaned_title.startswith("untitled_file"):
|
if not temp_cleaned_title or temp_cleaned_title.startswith("untitled_folder"):
|
||||||
self.logger(f"⚠️ Manga mode (Date+PostTitle Style): Post title for post {original_post_id_for_log} ('{post_title}') was empty or generic after cleaning. Using 'post' as title part.")
|
self.logger(f"⚠️ Manga mode (Date+PostTitle Style): Post title for post {original_post_id_for_log} ('{post_title}') was empty or generic after cleaning. Using 'post' as title part.")
|
||||||
cleaned_post_title_for_filename = "post"
|
cleaned_post_title_for_filename = "post"
|
||||||
else:
|
else:
|
||||||
@@ -436,7 +435,6 @@ class PostProcessorWorker:
|
|||||||
final_save_path_check = os.path.join(target_folder_path, filename_to_save_in_main_path)
|
final_save_path_check = os.path.join(target_folder_path, filename_to_save_in_main_path)
|
||||||
if os.path.exists(final_save_path_check):
|
if os.path.exists(final_save_path_check):
|
||||||
try:
|
try:
|
||||||
# Use a HEAD request to get the expected size without downloading the body
|
|
||||||
with requests.head(file_url, headers=file_download_headers, timeout=15, cookies=cookies_to_use_for_file, allow_redirects=True) as head_response:
|
with requests.head(file_url, headers=file_download_headers, timeout=15, cookies=cookies_to_use_for_file, allow_redirects=True) as head_response:
|
||||||
head_response.raise_for_status()
|
head_response.raise_for_status()
|
||||||
expected_size = int(head_response.headers.get('Content-Length', -1))
|
expected_size = int(head_response.headers.get('Content-Length', -1))
|
||||||
@@ -445,31 +443,21 @@ class PostProcessorWorker:
|
|||||||
|
|
||||||
if expected_size != -1 and actual_size == expected_size:
|
if expected_size != -1 and actual_size == expected_size:
|
||||||
self.logger(f" -> Skip (File Exists & Complete): '{filename_to_save_in_main_path}' is already on disk with the correct size.")
|
self.logger(f" -> Skip (File Exists & Complete): '{filename_to_save_in_main_path}' is already on disk with the correct size.")
|
||||||
|
|
||||||
# We still need to add its hash to the session to prevent duplicates in other modes
|
|
||||||
# This is a quick hash calculation for the already existing file
|
|
||||||
try:
|
try:
|
||||||
md5_hasher = hashlib.md5()
|
md5_hasher = hashlib.md5()
|
||||||
with open(final_save_path_check, 'rb') as f_verify:
|
with open(final_save_path_check, 'rb') as f_verify:
|
||||||
for chunk in iter(lambda: f_verify.read(8192), b""):
|
for chunk in iter(lambda: f_verify.read(8192), b""):
|
||||||
md5_hasher.update(chunk)
|
md5_hasher.update(chunk)
|
||||||
|
|
||||||
with self.downloaded_hash_counts_lock:
|
with self.downloaded_hash_counts_lock:
|
||||||
self.downloaded_hash_counts[md5_hasher.hexdigest()] += 1
|
self.downloaded_hash_counts[md5_hasher.hexdigest()] += 1
|
||||||
except Exception as hash_exc:
|
except Exception as hash_exc:
|
||||||
self.logger(f" ⚠️ Could not hash existing file '{filename_to_save_in_main_path}' for session: {hash_exc}")
|
self.logger(f" ⚠️ Could not hash existing file '{filename_to_save_in_main_path}' for session: {hash_exc}")
|
||||||
|
|
||||||
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
|
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
|
||||||
else:
|
else:
|
||||||
self.logger(f" ⚠️ File '{filename_to_save_in_main_path}' exists but is incomplete (Expected: {expected_size}, Actual: {actual_size}). Re-downloading.")
|
self.logger(f" ⚠️ File '{filename_to_save_in_main_path}' exists but is incomplete (Expected: {expected_size}, Actual: {actual_size}). Re-downloading.")
|
||||||
|
|
||||||
except requests.RequestException as e:
|
except requests.RequestException as e:
|
||||||
self.logger(f" ⚠️ Could not verify size of existing file '{filename_to_save_in_main_path}': {e}. Proceeding with download.")
|
self.logger(f" ⚠️ Could not verify size of existing file '{filename_to_save_in_main_path}': {e}. Proceeding with download.")
|
||||||
file_download_headers = {
|
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
|
|
||||||
'Referer': post_page_url
|
|
||||||
}
|
|
||||||
|
|
||||||
retry_delay = 5
|
retry_delay = 5
|
||||||
downloaded_size_bytes = 0
|
downloaded_size_bytes = 0
|
||||||
calculated_file_hash = None
|
calculated_file_hash = None
|
||||||
@@ -494,7 +482,6 @@ class PostProcessorWorker:
|
|||||||
|
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
total_size_bytes = int(response.headers.get('Content-Length', 0))
|
total_size_bytes = int(response.headers.get('Content-Length', 0))
|
||||||
# Use the dedicated parts count from the dialog, not the main thread count
|
|
||||||
num_parts_for_file = min(self.multipart_parts_count, MAX_PARTS_FOR_MULTIPART_DOWNLOAD)
|
num_parts_for_file = min(self.multipart_parts_count, MAX_PARTS_FOR_MULTIPART_DOWNLOAD)
|
||||||
|
|
||||||
file_is_eligible_by_scope = False
|
file_is_eligible_by_scope = False
|
||||||
@@ -532,7 +519,7 @@ class PostProcessorWorker:
|
|||||||
download_successful_flag = True
|
download_successful_flag = True
|
||||||
downloaded_size_bytes = mp_bytes
|
downloaded_size_bytes = mp_bytes
|
||||||
calculated_file_hash = mp_hash
|
calculated_file_hash = mp_hash
|
||||||
downloaded_part_file_path = mp_save_path_for_unique_part_stem_arg + ".part"
|
downloaded_part_file_path = mp_save_path_for_unique_part_stem_arg
|
||||||
if mp_file_handle: mp_file_handle.close()
|
if mp_file_handle: mp_file_handle.close()
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
@@ -683,26 +670,22 @@ class PostProcessorWorker:
|
|||||||
self.logger(f" 🔄 Compressing '{api_original_filename}' to WebP...")
|
self.logger(f" 🔄 Compressing '{api_original_filename}' to WebP...")
|
||||||
try:
|
try:
|
||||||
with Image.open(downloaded_part_file_path) as img:
|
with Image.open(downloaded_part_file_path) as img:
|
||||||
# Convert to RGB to avoid issues with paletted images or alpha channels in WebP
|
|
||||||
if img.mode not in ('RGB', 'RGBA'):
|
if img.mode not in ('RGB', 'RGBA'):
|
||||||
img = img.convert('RGBA')
|
img = img.convert('RGBA')
|
||||||
|
|
||||||
# Use an in-memory buffer to save the compressed image
|
|
||||||
output_buffer = BytesIO()
|
output_buffer = BytesIO()
|
||||||
img.save(output_buffer, format='WebP', quality=85)
|
img.save(output_buffer, format='WebP', quality=85)
|
||||||
|
|
||||||
# This buffer now holds the compressed data
|
|
||||||
data_to_write_io = output_buffer
|
data_to_write_io = output_buffer
|
||||||
|
|
||||||
# Update the filename to use the .webp extension
|
|
||||||
base, _ = os.path.splitext(filename_to_save_in_main_path)
|
base, _ = os.path.splitext(filename_to_save_in_main_path)
|
||||||
filename_to_save_in_main_path = f"{base}.webp"
|
filename_to_save_in_main_path = f"{base}.webp"
|
||||||
self.logger(f" ✅ Compression successful. New size: {len(data_to_write_io.getvalue()) / (1024*1024):.2f} MB")
|
self.logger(f" ✅ Compression successful. New size: {len(data_to_write_io.getvalue()) / (1024*1024):.2f} MB")
|
||||||
|
|
||||||
except Exception as e_compress:
|
except Exception as e_compress:
|
||||||
self.logger(f" ⚠️ Failed to compress '{api_original_filename}': {e_compress}. Saving original file instead.")
|
self.logger(f" ⚠️ Failed to compress '{api_original_filename}': {e_compress}. Saving original file instead.")
|
||||||
data_to_write_io = None # Ensure we fall back to saving the original
|
data_to_write_io = None
|
||||||
|
|
||||||
effective_save_folder = target_folder_path
|
effective_save_folder = target_folder_path
|
||||||
base_name, extension = os.path.splitext(filename_to_save_in_main_path)
|
base_name, extension = os.path.splitext(filename_to_save_in_main_path)
|
||||||
counter = 1
|
counter = 1
|
||||||
@@ -719,17 +702,14 @@ class PostProcessorWorker:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
if data_to_write_io:
|
if data_to_write_io:
|
||||||
# Write the compressed data from the in-memory buffer
|
|
||||||
with open(final_save_path, 'wb') as f_out:
|
with open(final_save_path, 'wb') as f_out:
|
||||||
f_out.write(data_to_write_io.getvalue())
|
f_out.write(data_to_write_io.getvalue())
|
||||||
# Clean up the original downloaded part file
|
|
||||||
if downloaded_part_file_path and os.path.exists(downloaded_part_file_path):
|
if downloaded_part_file_path and os.path.exists(downloaded_part_file_path):
|
||||||
try:
|
try:
|
||||||
os.remove(downloaded_part_file_path)
|
os.remove(downloaded_part_file_path)
|
||||||
except OSError as e_rem:
|
except OSError as e_rem:
|
||||||
self.logger(f" -> Failed to remove .part after compression: {e_rem}")
|
self.logger(f" -> Failed to remove .part after compression: {e_rem}")
|
||||||
else:
|
else:
|
||||||
# No compression was done, just rename the original file
|
|
||||||
if downloaded_part_file_path and os.path.exists(downloaded_part_file_path):
|
if downloaded_part_file_path and os.path.exists(downloaded_part_file_path):
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
os.rename(downloaded_part_file_path, final_save_path)
|
os.rename(downloaded_part_file_path, final_save_path)
|
||||||
|
|||||||
@@ -4555,44 +4555,51 @@ class DownloaderApp (QWidget ):
|
|||||||
self .active_retry_futures_map [future ]=job_details
|
self .active_retry_futures_map [future ]=job_details
|
||||||
self .active_retry_futures .append (future )
|
self .active_retry_futures .append (future )
|
||||||
|
|
||||||
def _execute_single_file_retry (self ,job_details ,common_args ):
|
def _execute_single_file_retry(self, job_details, common_args):
|
||||||
"""Executes a single file download retry attempt."""
|
"""
|
||||||
dummy_post_data ={'id':job_details ['original_post_id_for_log'],'title':job_details ['post_title']}
|
Executes a single file download retry attempt. This function is called by the retry thread pool.
|
||||||
|
"""
|
||||||
|
# This worker is temporary and only for this retry task.
|
||||||
|
# It needs dummy post data to initialize.
|
||||||
|
dummy_post_data = {'id': job_details['original_post_id_for_log'], 'title': job_details['post_title']}
|
||||||
|
|
||||||
ppw_init_args ={
|
# Reconstruct the post_page_url, which is needed by the download function
|
||||||
**common_args ,
|
service = job_details.get('service', 'unknown_service')
|
||||||
'post_data':dummy_post_data ,
|
user_id = job_details.get('user_id', 'unknown_user')
|
||||||
'service':job_details .get ('service','unknown_service'),
|
post_id = job_details.get('original_post_id_for_log', 'unknown_id')
|
||||||
'user_id':job_details .get ('user_id','unknown_user'),
|
api_url_input = job_details.get('api_url_input', '')
|
||||||
'api_url_input':job_details .get ('api_url_input',''),
|
parsed_api_url = urlparse(api_url_input)
|
||||||
'manga_mode_active':job_details .get ('manga_mode_active_for_file',False ),
|
api_domain = parsed_api_url.netloc if parsed_api_url.netloc else self._get_domain_for_service(service)
|
||||||
'manga_filename_style':job_details .get ('manga_filename_style_for_file',STYLE_POST_TITLE ),
|
post_page_url = f"https://{api_domain}/{service}/user/{user_id}/post/{post_id}"
|
||||||
'scan_content_for_images':common_args .get ('scan_content_for_images',False ),
|
|
||||||
'use_cookie':common_args .get ('use_cookie',False ),
|
# Prepare all arguments for the PostProcessorWorker
|
||||||
'cookie_text':common_args .get ('cookie_text',""),
|
ppw_init_args = {
|
||||||
'selected_cookie_file':common_args .get ('selected_cookie_file',None ),
|
**common_args,
|
||||||
'app_base_dir':common_args .get ('app_base_dir',None ),
|
'post_data': dummy_post_data,
|
||||||
|
'service': service,
|
||||||
|
'user_id': user_id,
|
||||||
|
'api_url_input': api_url_input
|
||||||
}
|
}
|
||||||
worker =PostProcessorWorker (**ppw_init_args )
|
|
||||||
|
|
||||||
dl_count ,skip_count ,filename_saved ,original_kept ,status ,_ =worker ._download_single_file (
|
worker = PostProcessorWorker(**ppw_init_args)
|
||||||
file_info =job_details ['file_info'],
|
|
||||||
target_folder_path =job_details ['target_folder_path'],
|
# Call the download method with the corrected arguments
|
||||||
headers =job_details ['headers'],
|
dl_count, skip_count, filename_saved, original_kept, status, _ = worker._download_single_file(
|
||||||
original_post_id_for_log =job_details ['original_post_id_for_log'],
|
file_info=job_details['file_info'],
|
||||||
skip_event =None ,
|
target_folder_path=job_details['target_folder_path'],
|
||||||
post_title =job_details ['post_title'],
|
post_page_url=post_page_url, # Using the correct argument
|
||||||
file_index_in_post =job_details ['file_index_in_post'],
|
original_post_id_for_log=job_details['original_post_id_for_log'],
|
||||||
num_files_in_this_post =job_details ['num_files_in_this_post'],
|
skip_event=None,
|
||||||
forced_filename_override =job_details .get ('forced_filename_override')
|
post_title=job_details['post_title'],
|
||||||
|
file_index_in_post=job_details['file_index_in_post'],
|
||||||
|
num_files_in_this_post=job_details['num_files_in_this_post'],
|
||||||
|
forced_filename_override=job_details.get('forced_filename_override')
|
||||||
)
|
)
|
||||||
|
|
||||||
|
is_successful_download = (status == FILE_DOWNLOAD_STATUS_SUCCESS)
|
||||||
|
is_resolved_as_skipped = (status == FILE_DOWNLOAD_STATUS_SKIPPED)
|
||||||
|
|
||||||
|
return is_successful_download or is_resolved_as_skipped
|
||||||
is_successful_download =(status ==FILE_DOWNLOAD_STATUS_SUCCESS )
|
|
||||||
is_resolved_as_skipped =(status ==FILE_DOWNLOAD_STATUS_SKIPPED )
|
|
||||||
|
|
||||||
return is_successful_download or is_resolved_as_skipped
|
|
||||||
|
|
||||||
def _handle_retry_future_result (self ,future ):
|
def _handle_retry_future_result (self ,future ):
|
||||||
self .processed_retry_count +=1
|
self .processed_retry_count +=1
|
||||||
|
|||||||
Reference in New Issue
Block a user