mirror of
https://github.com/Yuvi9587/Kemono-Downloader.git
synced 2025-12-29 16:14:44 +00:00
Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bf111d109a | ||
|
|
00f8ff63d6 | ||
|
|
aee0ff999d | ||
|
|
b5e9080285 | ||
|
|
25d33f1531 | ||
|
|
ff0ccb2631 | ||
|
|
da507b2b3a | ||
|
|
9165903e96 | ||
|
|
f85de58fcb |
14
Known.txt
14
Known.txt
@@ -1,14 +0,0 @@
|
|||||||
Hanabi intrusive
|
|
||||||
Hanzo
|
|
||||||
Hinata
|
|
||||||
Jett
|
|
||||||
Makima
|
|
||||||
Rangiku - Page
|
|
||||||
Reyna
|
|
||||||
Sage
|
|
||||||
Yor
|
|
||||||
Yoruichi
|
|
||||||
killjoy
|
|
||||||
neon
|
|
||||||
power
|
|
||||||
viper
|
|
||||||
|
|||||||
@@ -18,6 +18,13 @@ except ImportError:
|
|||||||
print("ERROR: Pillow library not found. Please install it: pip install Pillow")
|
print("ERROR: Pillow library not found. Please install it: pip install Pillow")
|
||||||
Image = None
|
Image = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
from multipart_downloader import download_file_in_parts
|
||||||
|
MULTIPART_DOWNLOADER_AVAILABLE = True
|
||||||
|
except ImportError as e:
|
||||||
|
print(f"Warning: multipart_downloader.py not found or import error: {e}. Multi-part downloads will be disabled.")
|
||||||
|
MULTIPART_DOWNLOADER_AVAILABLE = False
|
||||||
|
def download_file_in_parts(*args, **kwargs): return False, 0, None, None # Dummy function
|
||||||
|
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
@@ -31,10 +38,14 @@ SKIP_SCOPE_BOTH = "both"
|
|||||||
CHAR_SCOPE_TITLE = "title"
|
CHAR_SCOPE_TITLE = "title"
|
||||||
CHAR_SCOPE_FILES = "files"
|
CHAR_SCOPE_FILES = "files"
|
||||||
CHAR_SCOPE_BOTH = "both"
|
CHAR_SCOPE_BOTH = "both"
|
||||||
|
CHAR_SCOPE_COMMENTS = "comments"
|
||||||
|
|
||||||
fastapi_app = None
|
fastapi_app = None
|
||||||
KNOWN_NAMES = []
|
KNOWN_NAMES = []
|
||||||
|
|
||||||
|
MIN_SIZE_FOR_MULTIPART_DOWNLOAD = 10 * 1024 * 1024 # 10 MB - Stays the same
|
||||||
|
MAX_PARTS_FOR_MULTIPART_DOWNLOAD = 15 # Max concurrent connections for a single file
|
||||||
|
|
||||||
IMAGE_EXTENSIONS = {
|
IMAGE_EXTENSIONS = {
|
||||||
'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.tif', '.webp',
|
'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.tif', '.webp',
|
||||||
'.heic', '.heif', '.svg', '.ico', '.jfif', '.pjpeg', '.pjp', '.avif'
|
'.heic', '.heif', '.svg', '.ico', '.jfif', '.pjpeg', '.pjp', '.avif'
|
||||||
@@ -50,20 +61,31 @@ ARCHIVE_EXTENSIONS = {
|
|||||||
def is_title_match_for_character(post_title, character_name_filter):
|
def is_title_match_for_character(post_title, character_name_filter):
|
||||||
if not post_title or not character_name_filter:
|
if not post_title or not character_name_filter:
|
||||||
return False
|
return False
|
||||||
pattern = r"(?i)\b" + re.escape(character_name_filter) + r"\b"
|
safe_filter = str(character_name_filter).strip()
|
||||||
return bool(re.search(pattern, post_title))
|
if not safe_filter:
|
||||||
|
return False
|
||||||
|
|
||||||
|
pattern = r"(?i)\b" + re.escape(safe_filter) + r"\b"
|
||||||
|
match_result = bool(re.search(pattern, post_title))
|
||||||
|
return match_result
|
||||||
|
|
||||||
def is_filename_match_for_character(filename, character_name_filter):
|
def is_filename_match_for_character(filename, character_name_filter):
|
||||||
if not filename or not character_name_filter:
|
if not filename or not character_name_filter:
|
||||||
return False
|
return False
|
||||||
return character_name_filter.lower() in filename.lower()
|
|
||||||
|
safe_filter = str(character_name_filter).strip().lower()
|
||||||
|
if not safe_filter:
|
||||||
|
return False
|
||||||
|
|
||||||
|
match_result = safe_filter in filename.lower()
|
||||||
|
return match_result
|
||||||
|
|
||||||
|
|
||||||
def clean_folder_name(name):
|
def clean_folder_name(name):
|
||||||
if not isinstance(name, str): name = str(name)
|
if not isinstance(name, str): name = str(name)
|
||||||
cleaned = re.sub(r'[^\w\s\-\_\.\(\)]', '', name)
|
cleaned = re.sub(r'[^\w\s\-\_\.\(\)]', '', name)
|
||||||
cleaned = cleaned.strip()
|
cleaned = cleaned.strip()
|
||||||
cleaned = re.sub(r'\s+', '_', cleaned)
|
cleaned = re.sub(r'\s+', ' ', cleaned)
|
||||||
return cleaned if cleaned else "untitled_folder"
|
return cleaned if cleaned else "untitled_folder"
|
||||||
|
|
||||||
|
|
||||||
@@ -74,6 +96,15 @@ def clean_filename(name):
|
|||||||
cleaned = re.sub(r'\s+', '_', cleaned)
|
cleaned = re.sub(r'\s+', '_', cleaned)
|
||||||
return cleaned if cleaned else "untitled_file"
|
return cleaned if cleaned else "untitled_file"
|
||||||
|
|
||||||
|
def strip_html_tags(html_text):
|
||||||
|
if not html_text: return ""
|
||||||
|
# First, unescape HTML entities
|
||||||
|
text = html.unescape(html_text)
|
||||||
|
# Then, remove HTML tags using a simple regex
|
||||||
|
# This is a basic approach and might not handle all complex HTML perfectly
|
||||||
|
clean_pattern = re.compile('<.*?>')
|
||||||
|
cleaned_text = re.sub(clean_pattern, '', text)
|
||||||
|
return cleaned_text.strip()
|
||||||
|
|
||||||
def extract_folder_name_from_title(title, unwanted_keywords):
|
def extract_folder_name_from_title(title, unwanted_keywords):
|
||||||
if not title: return 'Uncategorized'
|
if not title: return 'Uncategorized'
|
||||||
@@ -196,6 +227,31 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise RuntimeError(f"Unexpected error fetching offset {offset} ({paginated_url}): {e}")
|
raise RuntimeError(f"Unexpected error fetching offset {offset} ({paginated_url}): {e}")
|
||||||
|
|
||||||
|
def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger, cancellation_event=None):
|
||||||
|
if cancellation_event and cancellation_event.is_set():
|
||||||
|
logger(" Comment fetch cancelled before request.")
|
||||||
|
raise RuntimeError("Comment fetch operation cancelled by user.")
|
||||||
|
|
||||||
|
comments_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}/comments"
|
||||||
|
logger(f" Fetching comments: {comments_api_url}")
|
||||||
|
try:
|
||||||
|
response = requests.get(comments_api_url, headers=headers, timeout=(10, 30)) # Shorter timeout for comments
|
||||||
|
response.raise_for_status()
|
||||||
|
if 'application/json' not in response.headers.get('Content-Type', '').lower():
|
||||||
|
logger(f"⚠️ Unexpected content type from comments API: {response.headers.get('Content-Type')}. Body: {response.text[:200]}")
|
||||||
|
return [] # Return empty list if not JSON
|
||||||
|
return response.json()
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
raise RuntimeError(f"Timeout fetching comments for post {post_id} from {comments_api_url}")
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
err_msg = f"Error fetching comments for post {post_id} from {comments_api_url}: {e}"
|
||||||
|
if e.response is not None:
|
||||||
|
err_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})"
|
||||||
|
raise RuntimeError(err_msg)
|
||||||
|
except ValueError as e: # JSONDecodeError inherits from ValueError
|
||||||
|
raise RuntimeError(f"Error decoding JSON from comments API for post {post_id} ({comments_api_url}): {e}. Response text: {response.text[:200]}")
|
||||||
|
except Exception as e:
|
||||||
|
raise RuntimeError(f"Unexpected error fetching comments for post {post_id} ({comments_api_url}): {e}")
|
||||||
|
|
||||||
def download_from_api(api_url_input, logger=print, start_page=None, end_page=None, manga_mode=False, cancellation_event=None):
|
def download_from_api(api_url_input, logger=print, start_page=None, end_page=None, manga_mode=False, cancellation_event=None):
|
||||||
headers = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'}
|
headers = {'User-Agent': 'Mozilla/5.0', 'Accept': 'application/json'}
|
||||||
@@ -241,7 +297,7 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non
|
|||||||
logger("✅ Reached end of posts (Manga Mode fetch all).")
|
logger("✅ Reached end of posts (Manga Mode fetch all).")
|
||||||
break
|
break
|
||||||
all_posts_for_manga_mode.extend(posts_batch_manga)
|
all_posts_for_manga_mode.extend(posts_batch_manga)
|
||||||
current_offset_manga += len(posts_batch_manga)
|
current_offset_manga += page_size # Increment by page_size for the next API call's 'o' parameter
|
||||||
time.sleep(0.6)
|
time.sleep(0.6)
|
||||||
except RuntimeError as e:
|
except RuntimeError as e:
|
||||||
if "cancelled by user" in str(e).lower():
|
if "cancelled by user" in str(e).lower():
|
||||||
@@ -328,7 +384,7 @@ def download_from_api(api_url_input, logger=print, start_page=None, end_page=Non
|
|||||||
if processed_target_post_flag:
|
if processed_target_post_flag:
|
||||||
break
|
break
|
||||||
|
|
||||||
current_offset += len(posts_batch)
|
current_offset += page_size # Increment by page_size for the next API call's 'o' parameter
|
||||||
current_page_num += 1
|
current_page_num += 1
|
||||||
time.sleep(0.6)
|
time.sleep(0.6)
|
||||||
|
|
||||||
@@ -366,7 +422,8 @@ class PostProcessorSignals(QObject):
|
|||||||
progress_signal = pyqtSignal(str)
|
progress_signal = pyqtSignal(str)
|
||||||
file_download_status_signal = pyqtSignal(bool)
|
file_download_status_signal = pyqtSignal(bool)
|
||||||
external_link_signal = pyqtSignal(str, str, str, str)
|
external_link_signal = pyqtSignal(str, str, str, str)
|
||||||
file_progress_signal = pyqtSignal(str, int, int)
|
file_progress_signal = pyqtSignal(str, object)
|
||||||
|
missed_character_post_signal = pyqtSignal(str, str) # New: post_title, reason
|
||||||
|
|
||||||
|
|
||||||
class PostProcessorWorker:
|
class PostProcessorWorker:
|
||||||
@@ -384,12 +441,14 @@ class PostProcessorWorker:
|
|||||||
num_file_threads=4, skip_current_file_flag=None,
|
num_file_threads=4, skip_current_file_flag=None,
|
||||||
manga_mode_active=False,
|
manga_mode_active=False,
|
||||||
manga_filename_style=STYLE_POST_TITLE,
|
manga_filename_style=STYLE_POST_TITLE,
|
||||||
char_filter_scope=CHAR_SCOPE_FILES
|
char_filter_scope=CHAR_SCOPE_FILES,
|
||||||
):
|
remove_from_filename_words_list=None,
|
||||||
|
allow_multipart_download=True,
|
||||||
|
): # Removed duplicate_file_mode and session-wide tracking
|
||||||
self.post = post_data
|
self.post = post_data
|
||||||
self.download_root = download_root
|
self.download_root = download_root
|
||||||
self.known_names = known_names
|
self.known_names = known_names
|
||||||
self.filter_character_list = filter_character_list if filter_character_list else []
|
self.filter_character_list_objects = filter_character_list if filter_character_list else []
|
||||||
self.unwanted_keywords = unwanted_keywords if unwanted_keywords is not None else set()
|
self.unwanted_keywords = unwanted_keywords if unwanted_keywords is not None else set()
|
||||||
self.filter_mode = filter_mode
|
self.filter_mode = filter_mode
|
||||||
self.skip_zip = skip_zip
|
self.skip_zip = skip_zip
|
||||||
@@ -421,6 +480,9 @@ class PostProcessorWorker:
|
|||||||
self.manga_mode_active = manga_mode_active
|
self.manga_mode_active = manga_mode_active
|
||||||
self.manga_filename_style = manga_filename_style
|
self.manga_filename_style = manga_filename_style
|
||||||
self.char_filter_scope = char_filter_scope
|
self.char_filter_scope = char_filter_scope
|
||||||
|
self.remove_from_filename_words_list = remove_from_filename_words_list if remove_from_filename_words_list is not None else []
|
||||||
|
self.allow_multipart_download = allow_multipart_download
|
||||||
|
# self.duplicate_file_mode and session-wide tracking removed
|
||||||
|
|
||||||
if self.compress_images and Image is None:
|
if self.compress_images and Image is None:
|
||||||
self.logger("⚠️ Image compression disabled: Pillow library not found.")
|
self.logger("⚠️ Image compression disabled: Pillow library not found.")
|
||||||
@@ -439,14 +501,15 @@ class PostProcessorWorker:
|
|||||||
post_title="", file_index_in_post=0, num_files_in_this_post=1):
|
post_title="", file_index_in_post=0, num_files_in_this_post=1):
|
||||||
was_original_name_kept_flag = False
|
was_original_name_kept_flag = False
|
||||||
final_filename_saved_for_return = ""
|
final_filename_saved_for_return = ""
|
||||||
|
# target_folder_path is the base character/post folder.
|
||||||
|
|
||||||
if self.check_cancel() or (skip_event and skip_event.is_set()): return 0, 1, "", False
|
if self.check_cancel() or (skip_event and skip_event.is_set()): return 0, 1, "", False
|
||||||
|
|
||||||
file_url = file_info.get('url')
|
file_url = file_info.get('url')
|
||||||
api_original_filename = file_info.get('_original_name_for_log', file_info.get('name'))
|
api_original_filename = file_info.get('_original_name_for_log', file_info.get('name'))
|
||||||
|
|
||||||
final_filename_saved_for_return = api_original_filename
|
# This is the ideal name for the file if it were to be saved in the main target_folder_path.
|
||||||
|
filename_to_save_in_main_path = ""
|
||||||
|
|
||||||
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_FILES or self.skip_words_scope == SKIP_SCOPE_BOTH):
|
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_FILES or self.skip_words_scope == SKIP_SCOPE_BOTH):
|
||||||
filename_to_check_for_skip_words = api_original_filename.lower()
|
filename_to_check_for_skip_words = api_original_filename.lower()
|
||||||
@@ -458,71 +521,55 @@ class PostProcessorWorker:
|
|||||||
original_filename_cleaned_base, original_ext = os.path.splitext(clean_filename(api_original_filename))
|
original_filename_cleaned_base, original_ext = os.path.splitext(clean_filename(api_original_filename))
|
||||||
if not original_ext.startswith('.'): original_ext = '.' + original_ext if original_ext else ''
|
if not original_ext.startswith('.'): original_ext = '.' + original_ext if original_ext else ''
|
||||||
|
|
||||||
filename_to_save = ""
|
if self.manga_mode_active: # Note: duplicate_file_mode is overridden to "Delete" in main.py if manga_mode is on
|
||||||
if self.manga_mode_active:
|
|
||||||
if self.manga_filename_style == STYLE_ORIGINAL_NAME:
|
if self.manga_filename_style == STYLE_ORIGINAL_NAME:
|
||||||
filename_to_save = clean_filename(api_original_filename)
|
filename_to_save_in_main_path = clean_filename(api_original_filename)
|
||||||
was_original_name_kept_flag = True
|
was_original_name_kept_flag = True
|
||||||
elif self.manga_filename_style == STYLE_POST_TITLE:
|
elif self.manga_filename_style == STYLE_POST_TITLE:
|
||||||
if post_title and post_title.strip():
|
if post_title and post_title.strip():
|
||||||
cleaned_post_title_base = clean_filename(post_title.strip())
|
cleaned_post_title_base = clean_filename(post_title.strip())
|
||||||
if num_files_in_this_post > 1:
|
if num_files_in_this_post > 1:
|
||||||
if file_index_in_post == 0:
|
if file_index_in_post == 0:
|
||||||
filename_to_save = f"{cleaned_post_title_base}{original_ext}"
|
filename_to_save_in_main_path = f"{cleaned_post_title_base}{original_ext}"
|
||||||
was_original_name_kept_flag = False
|
|
||||||
else:
|
else:
|
||||||
filename_to_save = clean_filename(api_original_filename)
|
filename_to_save_in_main_path = clean_filename(api_original_filename)
|
||||||
was_original_name_kept_flag = True
|
was_original_name_kept_flag = True
|
||||||
else:
|
else:
|
||||||
filename_to_save = f"{cleaned_post_title_base}{original_ext}"
|
filename_to_save_in_main_path = f"{cleaned_post_title_base}{original_ext}"
|
||||||
was_original_name_kept_flag = False
|
|
||||||
else:
|
else:
|
||||||
filename_to_save = clean_filename(api_original_filename)
|
filename_to_save_in_main_path = clean_filename(api_original_filename)
|
||||||
was_original_name_kept_flag = False
|
self.logger(f"⚠️ Manga mode (Post Title Style): Post title missing for post {original_post_id_for_log}. Using cleaned original filename '{filename_to_save_in_main_path}'.")
|
||||||
self.logger(f"⚠️ Manga mode (Post Title Style): Post title missing for post {original_post_id_for_log}. Using cleaned original filename '{filename_to_save}'.")
|
|
||||||
else:
|
else:
|
||||||
self.logger(f"⚠️ Manga mode: Unknown filename style '{self.manga_filename_style}'. Defaulting to original filename for '{api_original_filename}'.")
|
self.logger(f"⚠️ Manga mode: Unknown filename style '{self.manga_filename_style}'. Defaulting to original filename for '{api_original_filename}'.")
|
||||||
filename_to_save = clean_filename(api_original_filename)
|
filename_to_save_in_main_path = clean_filename(api_original_filename)
|
||||||
|
|
||||||
|
if not filename_to_save_in_main_path:
|
||||||
|
filename_to_save_in_main_path = f"manga_file_{original_post_id_for_log}_{file_index_in_post + 1}{original_ext}"
|
||||||
|
self.logger(f"⚠️ Manga mode: Generated filename was empty. Using generic fallback: '{filename_to_save_in_main_path}'.")
|
||||||
|
was_original_name_kept_flag = False
|
||||||
|
else:
|
||||||
|
filename_to_save_in_main_path = clean_filename(api_original_filename)
|
||||||
was_original_name_kept_flag = False
|
was_original_name_kept_flag = False
|
||||||
|
|
||||||
if filename_to_save:
|
if self.remove_from_filename_words_list and filename_to_save_in_main_path:
|
||||||
counter = 1
|
base_name_for_removal, ext_for_removal = os.path.splitext(filename_to_save_in_main_path)
|
||||||
base_name_coll, ext_coll = os.path.splitext(filename_to_save)
|
modified_base_name = base_name_for_removal
|
||||||
temp_filename_for_collision_check = filename_to_save
|
for word_to_remove in self.remove_from_filename_words_list:
|
||||||
while os.path.exists(os.path.join(target_folder_path, temp_filename_for_collision_check)):
|
if not word_to_remove: continue
|
||||||
if self.manga_filename_style == STYLE_POST_TITLE and file_index_in_post == 0 and num_files_in_this_post > 1:
|
pattern = re.compile(re.escape(word_to_remove), re.IGNORECASE)
|
||||||
temp_filename_for_collision_check = f"{base_name_coll}_{counter}{ext_coll}"
|
modified_base_name = pattern.sub("", modified_base_name)
|
||||||
|
modified_base_name = re.sub(r'[_.\s-]+', '_', modified_base_name)
|
||||||
|
modified_base_name = modified_base_name.strip('_')
|
||||||
|
if modified_base_name and modified_base_name != ext_for_removal.lstrip('.'):
|
||||||
|
filename_to_save_in_main_path = modified_base_name + ext_for_removal
|
||||||
else:
|
else:
|
||||||
temp_filename_for_collision_check = f"{base_name_coll}_{counter}{ext_coll}"
|
filename_to_save_in_main_path = base_name_for_removal + ext_for_removal
|
||||||
counter += 1
|
|
||||||
if temp_filename_for_collision_check != filename_to_save:
|
|
||||||
filename_to_save = temp_filename_for_collision_check
|
|
||||||
else:
|
|
||||||
filename_to_save = f"manga_file_{original_post_id_for_log}_{file_index_in_post + 1}{original_ext}"
|
|
||||||
self.logger(f"⚠️ Manga mode: Generated filename was empty. Using generic fallback: '{filename_to_save}'.")
|
|
||||||
was_original_name_kept_flag = False
|
|
||||||
|
|
||||||
else:
|
|
||||||
filename_to_save = clean_filename(api_original_filename)
|
|
||||||
was_original_name_kept_flag = False
|
|
||||||
counter = 1
|
|
||||||
base_name_coll, ext_coll = os.path.splitext(filename_to_save)
|
|
||||||
temp_filename_for_collision_check = filename_to_save
|
|
||||||
while os.path.exists(os.path.join(target_folder_path, temp_filename_for_collision_check)):
|
|
||||||
temp_filename_for_collision_check = f"{base_name_coll}_{counter}{ext_coll}"
|
|
||||||
counter += 1
|
|
||||||
if temp_filename_for_collision_check != filename_to_save:
|
|
||||||
filename_to_save = temp_filename_for_collision_check
|
|
||||||
|
|
||||||
final_filename_for_sets_and_saving = filename_to_save
|
|
||||||
final_filename_saved_for_return = final_filename_for_sets_and_saving
|
|
||||||
|
|
||||||
if not self.download_thumbnails:
|
if not self.download_thumbnails:
|
||||||
is_img_type = is_image(api_original_filename)
|
is_img_type = is_image(api_original_filename)
|
||||||
is_vid_type = is_video(api_original_filename)
|
is_vid_type = is_video(api_original_filename)
|
||||||
is_archive_type = is_archive(api_original_filename)
|
is_archive_type = is_archive(api_original_filename)
|
||||||
|
|
||||||
|
|
||||||
if self.filter_mode == 'archive':
|
if self.filter_mode == 'archive':
|
||||||
if not is_archive_type:
|
if not is_archive_type:
|
||||||
self.logger(f" -> Filter Skip (Archive Mode): '{api_original_filename}' (Not an Archive).")
|
self.logger(f" -> Filter Skip (Archive Mode): '{api_original_filename}' (Not an Archive).")
|
||||||
@@ -543,19 +590,31 @@ class PostProcessorWorker:
|
|||||||
self.logger(f" -> Pref Skip: '{api_original_filename}' (RAR).")
|
self.logger(f" -> Pref Skip: '{api_original_filename}' (RAR).")
|
||||||
return 0, 1, api_original_filename, False
|
return 0, 1, api_original_filename, False
|
||||||
|
|
||||||
target_folder_basename = os.path.basename(target_folder_path)
|
# --- Pre-Download Duplicate Handling (Standard Mode Only - Manga mode has its own suffixing) ---
|
||||||
current_save_path = os.path.join(target_folder_path, final_filename_for_sets_and_saving)
|
if not self.manga_mode_active:
|
||||||
|
path_in_main_folder_check = os.path.join(target_folder_path, filename_to_save_in_main_path)
|
||||||
if os.path.exists(current_save_path) and os.path.getsize(current_save_path) > 0:
|
is_duplicate_by_path = os.path.exists(path_in_main_folder_check) and \
|
||||||
self.logger(f" -> Exists (Path): '{final_filename_for_sets_and_saving}' in '{target_folder_basename}'.")
|
os.path.getsize(path_in_main_folder_check) > 0
|
||||||
with self.downloaded_files_lock: self.downloaded_files.add(final_filename_for_sets_and_saving)
|
|
||||||
return 0, 1, final_filename_for_sets_and_saving, was_original_name_kept_flag
|
|
||||||
|
|
||||||
|
is_duplicate_by_session_name = False
|
||||||
with self.downloaded_files_lock:
|
with self.downloaded_files_lock:
|
||||||
if final_filename_for_sets_and_saving in self.downloaded_files:
|
if filename_to_save_in_main_path in self.downloaded_files:
|
||||||
self.logger(f" -> Global Skip (Filename): '{final_filename_for_sets_and_saving}' already recorded this session.")
|
is_duplicate_by_session_name = True
|
||||||
return 0, 1, final_filename_for_sets_and_saving, was_original_name_kept_flag
|
|
||||||
|
|
||||||
|
if is_duplicate_by_path or is_duplicate_by_session_name:
|
||||||
|
reason = "Path Exists" if is_duplicate_by_path else "Session Name"
|
||||||
|
self.logger(f" -> Skip Duplicate ({reason}, Pre-DL): '{filename_to_save_in_main_path}'. Skipping download.")
|
||||||
|
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Mark as processed
|
||||||
|
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
|
||||||
|
|
||||||
|
# Ensure base target folder exists (used for .part file with multipart)
|
||||||
|
try:
|
||||||
|
os.makedirs(target_folder_path, exist_ok=True) # For .part file
|
||||||
|
except OSError as e:
|
||||||
|
self.logger(f" ❌ Critical error creating directory '{target_folder_path}': {e}. Skipping file '{api_original_filename}'.")
|
||||||
|
return 0, 1, api_original_filename, False
|
||||||
|
|
||||||
|
# --- Download Attempt ---
|
||||||
max_retries = 3
|
max_retries = 3
|
||||||
retry_delay = 5
|
retry_delay = 5
|
||||||
downloaded_size_bytes = 0
|
downloaded_size_bytes = 0
|
||||||
@@ -564,62 +623,73 @@ class PostProcessorWorker:
|
|||||||
total_size_bytes = 0
|
total_size_bytes = 0
|
||||||
download_successful_flag = False
|
download_successful_flag = False
|
||||||
|
|
||||||
for attempt_num in range(max_retries + 1):
|
for attempt_num_single_stream in range(max_retries + 1):
|
||||||
if self.check_cancel() or (skip_event and skip_event.is_set()):
|
if self.check_cancel() or (skip_event and skip_event.is_set()): break
|
||||||
break
|
|
||||||
try:
|
try:
|
||||||
if attempt_num > 0:
|
if attempt_num_single_stream > 0:
|
||||||
self.logger(f" Retrying '{api_original_filename}' (Attempt {attempt_num}/{max_retries})...")
|
self.logger(f" Retrying download for '{api_original_filename}' (Overall Attempt {attempt_num_single_stream + 1}/{max_retries + 1})...")
|
||||||
time.sleep(retry_delay * (2**(attempt_num - 1)))
|
time.sleep(retry_delay * (2**(attempt_num_single_stream - 1)))
|
||||||
|
|
||||||
if self.signals and hasattr(self.signals, 'file_download_status_signal'):
|
if self.signals and hasattr(self.signals, 'file_download_status_signal'):
|
||||||
self.signals.file_download_status_signal.emit(True)
|
self.signals.file_download_status_signal.emit(True)
|
||||||
|
|
||||||
response = requests.get(file_url, headers=headers, timeout=(15, 300), stream=True)
|
response = requests.get(file_url, headers=headers, timeout=(15, 300), stream=True)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
total_size_bytes = int(response.headers.get('Content-Length', 0))
|
||||||
|
|
||||||
current_total_size_bytes_from_headers = int(response.headers.get('Content-Length', 0))
|
num_parts_for_file = min(self.num_file_threads, MAX_PARTS_FOR_MULTIPART_DOWNLOAD)
|
||||||
|
attempt_multipart = (self.allow_multipart_download and MULTIPART_DOWNLOADER_AVAILABLE and
|
||||||
|
num_parts_for_file > 1 and total_size_bytes > MIN_SIZE_FOR_MULTIPART_DOWNLOAD and
|
||||||
|
'bytes' in response.headers.get('Accept-Ranges', '').lower())
|
||||||
|
|
||||||
if attempt_num == 0:
|
if attempt_multipart:
|
||||||
total_size_bytes = current_total_size_bytes_from_headers
|
response.close()
|
||||||
size_str = f"{total_size_bytes / (1024 * 1024):.2f} MB" if total_size_bytes > 0 else "unknown size"
|
if self.signals and hasattr(self.signals, 'file_download_status_signal'):
|
||||||
self.logger(f"⬇️ Downloading: '{api_original_filename}' (Size: {size_str}) [Saving as: '{final_filename_for_sets_and_saving}']")
|
self.signals.file_download_status_signal.emit(False)
|
||||||
|
|
||||||
current_attempt_total_size = total_size_bytes
|
# .part file is always based on the main target_folder_path and filename_to_save_in_main_path
|
||||||
|
mp_save_path_base_for_part = os.path.join(target_folder_path, filename_to_save_in_main_path)
|
||||||
|
mp_success, mp_bytes, mp_hash, mp_file_handle = download_file_in_parts(
|
||||||
|
file_url, mp_save_path_base_for_part, total_size_bytes, num_parts_for_file, headers,
|
||||||
|
api_original_filename, self.signals, self.cancellation_event, skip_event, self.logger
|
||||||
|
)
|
||||||
|
if mp_success:
|
||||||
|
download_successful_flag = True
|
||||||
|
downloaded_size_bytes = mp_bytes
|
||||||
|
calculated_file_hash = mp_hash
|
||||||
|
file_content_bytes = mp_file_handle
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
if attempt_num_single_stream < max_retries:
|
||||||
|
self.logger(f" Multi-part download attempt failed for '{api_original_filename}'. Retrying with single stream.")
|
||||||
|
else:
|
||||||
|
download_successful_flag = False; break
|
||||||
|
|
||||||
|
self.logger(f"⬇️ Downloading (Single Stream): '{api_original_filename}' (Size: {total_size_bytes / (1024*1024):.2f} MB if known) [Base Name: '{filename_to_save_in_main_path}']")
|
||||||
file_content_buffer = BytesIO()
|
file_content_buffer = BytesIO()
|
||||||
current_attempt_downloaded_bytes = 0
|
current_attempt_downloaded_bytes = 0
|
||||||
md5_hasher = hashlib.md5()
|
md5_hasher = hashlib.md5()
|
||||||
last_progress_time = time.time()
|
last_progress_time = time.time()
|
||||||
|
|
||||||
for chunk in response.iter_content(chunk_size=1 * 1024 * 1024):
|
for chunk in response.iter_content(chunk_size=1 * 1024 * 1024):
|
||||||
if self.check_cancel() or (skip_event and skip_event.is_set()):
|
if self.check_cancel() or (skip_event and skip_event.is_set()): break
|
||||||
break
|
|
||||||
if chunk:
|
if chunk:
|
||||||
file_content_buffer.write(chunk)
|
file_content_buffer.write(chunk); md5_hasher.update(chunk)
|
||||||
md5_hasher.update(chunk)
|
|
||||||
current_attempt_downloaded_bytes += len(chunk)
|
current_attempt_downloaded_bytes += len(chunk)
|
||||||
if time.time() - last_progress_time > 1 and current_attempt_total_size > 0 and \
|
if time.time() - last_progress_time > 1 and total_size_bytes > 0 and \
|
||||||
self.signals and hasattr(self.signals, 'file_progress_signal'):
|
self.signals and hasattr(self.signals, 'file_progress_signal'):
|
||||||
self.signals.file_progress_signal.emit(
|
self.signals.file_progress_signal.emit(api_original_filename, (current_attempt_downloaded_bytes, total_size_bytes))
|
||||||
api_original_filename,
|
|
||||||
current_attempt_downloaded_bytes,
|
|
||||||
current_attempt_total_size
|
|
||||||
)
|
|
||||||
last_progress_time = time.time()
|
last_progress_time = time.time()
|
||||||
|
|
||||||
if self.check_cancel() or (skip_event and skip_event.is_set()):
|
if self.check_cancel() or (skip_event and skip_event.is_set()):
|
||||||
if file_content_buffer: file_content_buffer.close()
|
if file_content_buffer: file_content_buffer.close(); break
|
||||||
break
|
|
||||||
|
|
||||||
if current_attempt_downloaded_bytes > 0 or (current_attempt_total_size == 0 and response.status_code == 200):
|
if current_attempt_downloaded_bytes > 0 or (total_size_bytes == 0 and response.status_code == 200):
|
||||||
calculated_file_hash = md5_hasher.hexdigest()
|
calculated_file_hash = md5_hasher.hexdigest()
|
||||||
downloaded_size_bytes = current_attempt_downloaded_bytes
|
downloaded_size_bytes = current_attempt_downloaded_bytes
|
||||||
if file_content_bytes: file_content_bytes.close()
|
if file_content_bytes: file_content_bytes.close()
|
||||||
file_content_bytes = file_content_buffer
|
file_content_bytes = file_content_buffer; file_content_bytes.seek(0)
|
||||||
file_content_bytes.seek(0)
|
download_successful_flag = True; break
|
||||||
download_successful_flag = True
|
|
||||||
break
|
|
||||||
else:
|
else:
|
||||||
if file_content_buffer: file_content_buffer.close()
|
if file_content_buffer: file_content_buffer.close()
|
||||||
|
|
||||||
@@ -628,97 +698,154 @@ class PostProcessorWorker:
|
|||||||
if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close()
|
if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close()
|
||||||
except requests.exceptions.RequestException as e:
|
except requests.exceptions.RequestException as e:
|
||||||
self.logger(f" ❌ Download Error (Non-Retryable): {api_original_filename}. Error: {e}")
|
self.logger(f" ❌ Download Error (Non-Retryable): {api_original_filename}. Error: {e}")
|
||||||
if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close()
|
if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close(); break
|
||||||
break
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger(f" ❌ Unexpected Download Error: {api_original_filename}: {e}\n{traceback.format_exc(limit=2)}")
|
self.logger(f" ❌ Unexpected Download Error: {api_original_filename}: {e}\n{traceback.format_exc(limit=2)}")
|
||||||
if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close()
|
if 'file_content_buffer' in locals() and file_content_buffer: file_content_buffer.close(); break
|
||||||
break
|
|
||||||
finally:
|
finally:
|
||||||
if self.signals and hasattr(self.signals, 'file_download_status_signal'):
|
if self.signals and hasattr(self.signals, 'file_download_status_signal'):
|
||||||
self.signals.file_download_status_signal.emit(False)
|
self.signals.file_download_status_signal.emit(False)
|
||||||
|
|
||||||
if self.signals and hasattr(self.signals, 'file_progress_signal'):
|
if self.signals and hasattr(self.signals, 'file_progress_signal'):
|
||||||
final_total_for_progress = total_size_bytes if download_successful_flag and total_size_bytes > 0 else downloaded_size_bytes
|
final_total_for_progress = total_size_bytes if download_successful_flag and total_size_bytes > 0 else downloaded_size_bytes
|
||||||
self.signals.file_progress_signal.emit(api_original_filename, downloaded_size_bytes, final_total_for_progress)
|
self.signals.file_progress_signal.emit(api_original_filename, (downloaded_size_bytes, final_total_for_progress))
|
||||||
|
|
||||||
if self.check_cancel() or (skip_event and skip_event.is_set()):
|
if self.check_cancel() or (skip_event and skip_event.is_set()):
|
||||||
self.logger(f" ⚠️ Download interrupted for {api_original_filename}.")
|
self.logger(f" ⚠️ Download process interrupted for {api_original_filename}.")
|
||||||
if file_content_bytes: file_content_bytes.close()
|
if file_content_bytes: file_content_bytes.close()
|
||||||
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag
|
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
|
||||||
|
|
||||||
if not download_successful_flag:
|
if not download_successful_flag:
|
||||||
self.logger(f"❌ Download failed for '{api_original_filename}' after {max_retries + 1} attempts.")
|
self.logger(f"❌ Download failed for '{api_original_filename}' after {max_retries + 1} attempts.")
|
||||||
if file_content_bytes: file_content_bytes.close()
|
if file_content_bytes: file_content_bytes.close()
|
||||||
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag
|
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
|
||||||
|
|
||||||
|
# --- Universal Post-Download Hash Check ---
|
||||||
with self.downloaded_file_hashes_lock:
|
with self.downloaded_file_hashes_lock:
|
||||||
if calculated_file_hash in self.downloaded_file_hashes:
|
if calculated_file_hash in self.downloaded_file_hashes:
|
||||||
self.logger(f" -> Content Skip (Hash): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...) already downloaded this session.")
|
self.logger(f" -> Skip Saving Duplicate (Hash Match): '{api_original_filename}' (Hash: {calculated_file_hash[:8]}...).")
|
||||||
with self.downloaded_files_lock: self.downloaded_files.add(final_filename_for_sets_and_saving)
|
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Mark logical name
|
||||||
if file_content_bytes: file_content_bytes.close()
|
if file_content_bytes: file_content_bytes.close()
|
||||||
return 0, 1, final_filename_for_sets_and_saving, was_original_name_kept_flag
|
# If it was a multipart download, its .part file needs cleanup
|
||||||
|
if not isinstance(file_content_bytes, BytesIO): # Indicates multipart download
|
||||||
|
part_file_to_remove = os.path.join(target_folder_path, filename_to_save_in_main_path + ".part")
|
||||||
|
if os.path.exists(part_file_to_remove):
|
||||||
|
try: os.remove(part_file_to_remove);
|
||||||
|
except OSError: self.logger(f" -> Failed to remove .part file for hash duplicate: {part_file_to_remove}")
|
||||||
|
return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag
|
||||||
|
|
||||||
bytes_to_write = file_content_bytes
|
# --- Determine Save Location and Final Filename ---
|
||||||
final_filename_after_processing = final_filename_for_sets_and_saving
|
effective_save_folder = target_folder_path # Default: main character/post folder
|
||||||
current_save_path_final = current_save_path
|
# filename_to_save_in_main_path is the logical name after cleaning, manga styling, word removal
|
||||||
|
filename_after_styling_and_word_removal = filename_to_save_in_main_path
|
||||||
|
|
||||||
|
# "Move" logic and "Duplicate" subfolder logic removed.
|
||||||
|
# effective_save_folder will always be target_folder_path.
|
||||||
|
|
||||||
|
try: # Ensure the chosen save folder (main or Duplicate) exists
|
||||||
|
os.makedirs(effective_save_folder, exist_ok=True)
|
||||||
|
except OSError as e:
|
||||||
|
self.logger(f" ❌ Critical error creating directory '{effective_save_folder}': {e}. Skipping file '{api_original_filename}'.")
|
||||||
|
if file_content_bytes: file_content_bytes.close()
|
||||||
|
# Cleanup .part file if multipart
|
||||||
|
if not isinstance(file_content_bytes, BytesIO):
|
||||||
|
part_file_to_remove = os.path.join(target_folder_path, filename_to_save_in_main_path + ".part")
|
||||||
|
if os.path.exists(part_file_to_remove): os.remove(part_file_to_remove)
|
||||||
|
return 0, 1, api_original_filename, False
|
||||||
|
|
||||||
|
# --- Image Compression ---
|
||||||
|
# This operates on file_content_bytes (which is BytesIO or a file handle from multipart)
|
||||||
|
# It might change filename_after_styling_and_word_removal's extension (e.g., .jpg to .webp)
|
||||||
|
# and returns new data_to_write_after_compression (BytesIO) or original file_content_bytes.
|
||||||
|
data_to_write_after_compression = file_content_bytes
|
||||||
|
filename_after_compression = filename_after_styling_and_word_removal
|
||||||
|
|
||||||
is_img_for_compress_check = is_image(api_original_filename)
|
is_img_for_compress_check = is_image(api_original_filename)
|
||||||
if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024):
|
if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024):
|
||||||
self.logger(f" Compressing '{api_original_filename}' ({downloaded_size_bytes / (1024*1024):.2f} MB)...")
|
self.logger(f" Compressing '{api_original_filename}' ({downloaded_size_bytes / (1024*1024):.2f} MB)...")
|
||||||
try:
|
try:
|
||||||
bytes_to_write.seek(0)
|
file_content_bytes.seek(0)
|
||||||
with Image.open(bytes_to_write) as img_obj:
|
with Image.open(file_content_bytes) as img_obj:
|
||||||
if img_obj.mode == 'P': img_obj = img_obj.convert('RGBA')
|
if img_obj.mode == 'P': img_obj = img_obj.convert('RGBA')
|
||||||
elif img_obj.mode not in ['RGB', 'RGBA', 'L']: img_obj = img_obj.convert('RGB')
|
elif img_obj.mode not in ['RGB', 'RGBA', 'L']: img_obj = img_obj.convert('RGB')
|
||||||
|
|
||||||
compressed_bytes_io = BytesIO()
|
compressed_bytes_io = BytesIO()
|
||||||
img_obj.save(compressed_bytes_io, format='WebP', quality=80, method=4)
|
img_obj.save(compressed_bytes_io, format='WebP', quality=80, method=4)
|
||||||
compressed_size = compressed_bytes_io.getbuffer().nbytes
|
compressed_size = compressed_bytes_io.getbuffer().nbytes
|
||||||
|
|
||||||
if compressed_size < downloaded_size_bytes * 0.9:
|
if compressed_size < downloaded_size_bytes * 0.9: # If significantly smaller
|
||||||
self.logger(f" Compression success: {compressed_size / (1024*1024):.2f} MB.")
|
self.logger(f" Compression success: {compressed_size / (1024*1024):.2f} MB.")
|
||||||
bytes_to_write.close()
|
data_to_write_after_compression = compressed_bytes_io; data_to_write_after_compression.seek(0)
|
||||||
bytes_to_write = compressed_bytes_io
|
base_name_orig, _ = os.path.splitext(filename_after_compression)
|
||||||
bytes_to_write.seek(0)
|
filename_after_compression = base_name_orig + '.webp'
|
||||||
|
self.logger(f" Updated filename (compressed): {filename_after_compression}")
|
||||||
base_name_orig, _ = os.path.splitext(final_filename_for_sets_and_saving)
|
|
||||||
final_filename_after_processing = base_name_orig + '.webp'
|
|
||||||
current_save_path_final = os.path.join(target_folder_path, final_filename_after_processing)
|
|
||||||
self.logger(f" Updated filename (compressed): {final_filename_after_processing}")
|
|
||||||
else:
|
else:
|
||||||
self.logger(f" Compression skipped: WebP not significantly smaller."); bytes_to_write.seek(0)
|
self.logger(f" Compression skipped: WebP not significantly smaller."); file_content_bytes.seek(0) # Reset original stream
|
||||||
|
data_to_write_after_compression = file_content_bytes # Use original
|
||||||
except Exception as comp_e:
|
except Exception as comp_e:
|
||||||
self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original."); bytes_to_write.seek(0)
|
self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original."); file_content_bytes.seek(0)
|
||||||
|
data_to_write_after_compression = file_content_bytes # Use original
|
||||||
|
|
||||||
final_filename_saved_for_return = final_filename_after_processing
|
# --- Final Numeric Suffixing in the effective_save_folder ---
|
||||||
|
final_filename_on_disk = filename_after_compression # This is the name after potential compression
|
||||||
|
temp_base, temp_ext = os.path.splitext(final_filename_on_disk)
|
||||||
|
suffix_counter = 1
|
||||||
|
while os.path.exists(os.path.join(effective_save_folder, final_filename_on_disk)):
|
||||||
|
final_filename_on_disk = f"{temp_base}_{suffix_counter}{temp_ext}"
|
||||||
|
suffix_counter += 1
|
||||||
|
|
||||||
if final_filename_after_processing != final_filename_for_sets_and_saving and \
|
if final_filename_on_disk != filename_after_compression:
|
||||||
os.path.exists(current_save_path_final) and os.path.getsize(current_save_path_final) > 0:
|
self.logger(f" Applied numeric suffix in '{os.path.basename(effective_save_folder)}': '{final_filename_on_disk}' (was '{filename_after_compression}')")
|
||||||
self.logger(f" -> Exists (Path - Post-Compress): '{final_filename_after_processing}' in '{target_folder_basename}'.")
|
|
||||||
with self.downloaded_files_lock: self.downloaded_files.add(final_filename_after_processing)
|
# --- Save File ---
|
||||||
bytes_to_write.close()
|
final_save_path = os.path.join(effective_save_folder, final_filename_on_disk)
|
||||||
return 0, 1, final_filename_after_processing, was_original_name_kept_flag
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
os.makedirs(os.path.dirname(current_save_path_final), exist_ok=True)
|
# data_to_write_after_compression is BytesIO (single stream, or compressed multipart)
|
||||||
with open(current_save_path_final, 'wb') as f_out:
|
# OR it's the original file_content_bytes (which is a file handle if uncompressed multipart)
|
||||||
f_out.write(bytes_to_write.getvalue())
|
|
||||||
|
if data_to_write_after_compression is file_content_bytes and not isinstance(file_content_bytes, BytesIO):
|
||||||
|
# This means uncompressed multipart download. Original .part file handle is file_content_bytes.
|
||||||
|
# The .part file is at target_folder_path/filename_to_save_in_main_path.part
|
||||||
|
original_part_file_actual_path = file_content_bytes.name
|
||||||
|
file_content_bytes.close() # Close handle first
|
||||||
|
os.rename(original_part_file_actual_path, final_save_path)
|
||||||
|
self.logger(f" Renamed .part file to final: {final_save_path}")
|
||||||
|
else: # Single stream download, or compressed multipart. Write from BytesIO.
|
||||||
|
with open(final_save_path, 'wb') as f_out:
|
||||||
|
f_out.write(data_to_write_after_compression.getvalue())
|
||||||
|
|
||||||
|
# If original was multipart and then compressed, clean up original .part file
|
||||||
|
if data_to_write_after_compression is not file_content_bytes and not isinstance(file_content_bytes, BytesIO):
|
||||||
|
original_part_file_actual_path = file_content_bytes.name
|
||||||
|
file_content_bytes.close()
|
||||||
|
if os.path.exists(original_part_file_actual_path):
|
||||||
|
try: os.remove(original_part_file_actual_path)
|
||||||
|
except OSError as e_rem: self.logger(f" -> Failed to remove .part after compression: {e_rem}")
|
||||||
|
|
||||||
with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_file_hash)
|
with self.downloaded_file_hashes_lock: self.downloaded_file_hashes.add(calculated_file_hash)
|
||||||
with self.downloaded_files_lock: self.downloaded_files.add(final_filename_after_processing)
|
with self.downloaded_files_lock: self.downloaded_files.add(filename_to_save_in_main_path) # Track by logical name
|
||||||
|
|
||||||
self.logger(f"✅ Saved: '{final_filename_after_processing}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{target_folder_basename}'")
|
final_filename_saved_for_return = final_filename_on_disk
|
||||||
|
self.logger(f"✅ Saved: '{final_filename_saved_for_return}' (from '{api_original_filename}', {downloaded_size_bytes / (1024*1024):.2f} MB) in '{os.path.basename(effective_save_folder)}'")
|
||||||
|
# Session-wide base name tracking removed.
|
||||||
time.sleep(0.05)
|
time.sleep(0.05)
|
||||||
return 1, 0, final_filename_after_processing, was_original_name_kept_flag
|
return 1, 0, final_filename_saved_for_return, was_original_name_kept_flag
|
||||||
except Exception as save_err:
|
except Exception as save_err:
|
||||||
self.logger(f"❌ Save Fail for '{final_filename_after_processing}': {save_err}")
|
self.logger(f"❌ Save Fail for '{final_filename_on_disk}': {save_err}")
|
||||||
if os.path.exists(current_save_path_final):
|
if os.path.exists(final_save_path):
|
||||||
try: os.remove(current_save_path_final);
|
try: os.remove(final_save_path);
|
||||||
except OSError: self.logger(f" -> Failed to remove partially saved file: {current_save_path_final}")
|
except OSError: self.logger(f" -> Failed to remove partially saved file: {final_save_path}")
|
||||||
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag
|
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag
|
||||||
finally:
|
finally:
|
||||||
if bytes_to_write: bytes_to_write.close()
|
# Ensure all handles are closed
|
||||||
|
if data_to_write_after_compression and hasattr(data_to_write_after_compression, 'close'):
|
||||||
|
data_to_write_after_compression.close()
|
||||||
|
# If original file_content_bytes was a different handle (e.g. multipart before compression) and not closed yet
|
||||||
|
if file_content_bytes and file_content_bytes is not data_to_write_after_compression and hasattr(file_content_bytes, 'close'):
|
||||||
|
try:
|
||||||
|
if not file_content_bytes.closed: # Check if already closed
|
||||||
|
file_content_bytes.close()
|
||||||
|
except Exception: pass # Ignore errors on close if already handled
|
||||||
|
|
||||||
|
|
||||||
def process(self):
|
def process(self):
|
||||||
@@ -748,31 +875,160 @@ class PostProcessorWorker:
|
|||||||
|
|
||||||
post_is_candidate_by_title_char_match = False
|
post_is_candidate_by_title_char_match = False
|
||||||
char_filter_that_matched_title = None
|
char_filter_that_matched_title = None
|
||||||
|
post_is_candidate_by_comment_char_match = False
|
||||||
|
# New variables for CHAR_SCOPE_COMMENTS file-first logic
|
||||||
|
post_is_candidate_by_file_char_match_in_comment_scope = False
|
||||||
|
char_filter_that_matched_file_in_comment_scope = None
|
||||||
|
char_filter_that_matched_comment = None
|
||||||
|
|
||||||
if self.filter_character_list and \
|
if self.filter_character_list_objects and \
|
||||||
(self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH):
|
(self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH):
|
||||||
for char_name in self.filter_character_list:
|
# self.logger(f" [Debug Title Match] Checking post title '{post_title}' against {len(self.filter_character_list_objects)} filter objects. Scope: {self.char_filter_scope}")
|
||||||
if is_title_match_for_character(post_title, char_name):
|
for idx, filter_item_obj in enumerate(self.filter_character_list_objects):
|
||||||
post_is_candidate_by_title_char_match = True
|
if self.check_cancel(): break
|
||||||
char_filter_that_matched_title = char_name
|
# self.logger(f" [Debug Title Match] Filter obj #{idx}: {filter_item_obj}")
|
||||||
self.logger(f" Post title matches char filter '{char_name}' (Scope: {self.char_filter_scope}). Post is candidate.")
|
terms_to_check_for_title = list(filter_item_obj["aliases"])
|
||||||
break
|
if filter_item_obj["is_group"]:
|
||||||
|
if filter_item_obj["name"] not in terms_to_check_for_title:
|
||||||
|
terms_to_check_for_title.append(filter_item_obj["name"])
|
||||||
|
|
||||||
if self.filter_character_list and self.char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match:
|
unique_terms_for_title_check = list(set(terms_to_check_for_title))
|
||||||
|
# self.logger(f" [Debug Title Match] Unique terms for this filter obj: {unique_terms_for_title_check}")
|
||||||
|
|
||||||
|
for term_to_match in unique_terms_for_title_check:
|
||||||
|
# self.logger(f" [Debug Title Match] Checking term: '{term_to_match}'")
|
||||||
|
match_found_for_term = is_title_match_for_character(post_title, term_to_match)
|
||||||
|
# self.logger(f" [Debug Title Match] Result for '{term_to_match}': {match_found_for_term}")
|
||||||
|
if match_found_for_term:
|
||||||
|
post_is_candidate_by_title_char_match = True
|
||||||
|
char_filter_that_matched_title = filter_item_obj
|
||||||
|
self.logger(f" Post title matches char filter term '{term_to_match}' (from group/name '{filter_item_obj['name']}', Scope: {self.char_filter_scope}). Post is candidate.")
|
||||||
|
break
|
||||||
|
if post_is_candidate_by_title_char_match: break
|
||||||
|
# self.logger(f" [Debug Title Match] Final post_is_candidate_by_title_char_match: {post_is_candidate_by_title_char_match}")
|
||||||
|
|
||||||
|
# --- Populate all_files_from_post_api before character filter logic that needs it ---
|
||||||
|
# This is needed for the file-first check in CHAR_SCOPE_COMMENTS
|
||||||
|
all_files_from_post_api_for_char_check = []
|
||||||
|
api_file_domain_for_char_check = urlparse(self.api_url_input).netloc
|
||||||
|
if not api_file_domain_for_char_check or not any(d in api_file_domain_for_char_check.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']):
|
||||||
|
api_file_domain_for_char_check = "kemono.su" if "kemono" in self.service.lower() else "coomer.party"
|
||||||
|
|
||||||
|
if post_main_file_info and isinstance(post_main_file_info, dict) and post_main_file_info.get('path'):
|
||||||
|
original_api_name = post_main_file_info.get('name') or os.path.basename(post_main_file_info['path'].lstrip('/'))
|
||||||
|
if original_api_name:
|
||||||
|
all_files_from_post_api_for_char_check.append({'_original_name_for_log': original_api_name})
|
||||||
|
|
||||||
|
for att_info in post_attachments:
|
||||||
|
if isinstance(att_info, dict) and att_info.get('path'):
|
||||||
|
original_api_att_name = att_info.get('name') or os.path.basename(att_info['path'].lstrip('/'))
|
||||||
|
if original_api_att_name:
|
||||||
|
all_files_from_post_api_for_char_check.append({'_original_name_for_log': original_api_att_name})
|
||||||
|
# --- End population of all_files_from_post_api_for_char_check ---
|
||||||
|
|
||||||
|
|
||||||
|
if self.filter_character_list_objects and self.char_filter_scope == CHAR_SCOPE_COMMENTS:
|
||||||
|
self.logger(f" [Char Scope: Comments] Phase 1: Checking post files for matches before comments for post ID '{post_id}'.")
|
||||||
|
for file_info_item in all_files_from_post_api_for_char_check: # Use the pre-populated list of file names
|
||||||
|
if self.check_cancel(): break
|
||||||
|
current_api_original_filename_for_check = file_info_item.get('_original_name_for_log')
|
||||||
|
if not current_api_original_filename_for_check: continue
|
||||||
|
|
||||||
|
for filter_item_obj in self.filter_character_list_objects:
|
||||||
|
terms_to_check = list(filter_item_obj["aliases"])
|
||||||
|
if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check:
|
||||||
|
terms_to_check.append(filter_item_obj["name"])
|
||||||
|
|
||||||
|
for term_to_match in terms_to_check:
|
||||||
|
if is_filename_match_for_character(current_api_original_filename_for_check, term_to_match):
|
||||||
|
post_is_candidate_by_file_char_match_in_comment_scope = True
|
||||||
|
char_filter_that_matched_file_in_comment_scope = filter_item_obj
|
||||||
|
self.logger(f" Match Found (File in Comments Scope): File '{current_api_original_filename_for_check}' matches char filter term '{term_to_match}' (from group/name '{filter_item_obj['name']}'). Post is candidate.")
|
||||||
|
break
|
||||||
|
if post_is_candidate_by_file_char_match_in_comment_scope: break
|
||||||
|
if post_is_candidate_by_file_char_match_in_comment_scope: break
|
||||||
|
self.logger(f" [Char Scope: Comments] Phase 1 Result: post_is_candidate_by_file_char_match_in_comment_scope = {post_is_candidate_by_file_char_match_in_comment_scope}")
|
||||||
|
|
||||||
|
if self.filter_character_list_objects and self.char_filter_scope == CHAR_SCOPE_COMMENTS:
|
||||||
|
if not post_is_candidate_by_file_char_match_in_comment_scope:
|
||||||
|
self.logger(f" [Char Scope: Comments] Phase 2: No file match found. Checking post comments for post ID '{post_id}'.")
|
||||||
|
try:
|
||||||
|
parsed_input_url_for_comments = urlparse(self.api_url_input)
|
||||||
|
api_domain_for_comments = parsed_input_url_for_comments.netloc
|
||||||
|
if not any(d in api_domain_for_comments.lower() for d in ['kemono.su', 'kemono.party', 'coomer.su', 'coomer.party']):
|
||||||
|
self.logger(f"⚠️ Unrecognized domain '{api_domain_for_comments}' for comment API. Defaulting based on service.")
|
||||||
|
api_domain_for_comments = "kemono.su" if "kemono" in self.service.lower() else "coomer.party"
|
||||||
|
|
||||||
|
comments_data = fetch_post_comments(
|
||||||
|
api_domain_for_comments, self.service, self.user_id, post_id,
|
||||||
|
headers, self.logger, self.cancellation_event
|
||||||
|
)
|
||||||
|
if comments_data:
|
||||||
|
self.logger(f" Fetched {len(comments_data)} comments for post {post_id}.")
|
||||||
|
for comment_item_idx, comment_item in enumerate(comments_data):
|
||||||
|
if self.check_cancel(): break
|
||||||
|
raw_comment_content = comment_item.get('content', '')
|
||||||
|
if not raw_comment_content: continue
|
||||||
|
|
||||||
|
cleaned_comment_text = strip_html_tags(raw_comment_content)
|
||||||
|
if not cleaned_comment_text.strip(): continue
|
||||||
|
|
||||||
|
for filter_item_obj in self.filter_character_list_objects:
|
||||||
|
terms_to_check_comment = list(filter_item_obj["aliases"])
|
||||||
|
if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check_comment:
|
||||||
|
terms_to_check_comment.append(filter_item_obj["name"])
|
||||||
|
|
||||||
|
for term_to_match_comment in terms_to_check_comment:
|
||||||
|
if is_title_match_for_character(cleaned_comment_text, term_to_match_comment): # Re-use title matcher
|
||||||
|
post_is_candidate_by_comment_char_match = True
|
||||||
|
char_filter_that_matched_comment = filter_item_obj
|
||||||
|
self.logger(f" Match Found (Comment in Comments Scope): Comment in post {post_id} matches char filter term '{term_to_match_comment}' (from group/name '{filter_item_obj['name']}'). Post is candidate.")
|
||||||
|
self.logger(f" Matching comment (first 100 chars): '{cleaned_comment_text[:100]}...'")
|
||||||
|
break
|
||||||
|
if post_is_candidate_by_comment_char_match: break
|
||||||
|
if post_is_candidate_by_comment_char_match: break
|
||||||
|
else:
|
||||||
|
self.logger(f" No comments found or fetched for post {post_id} to check against character filters.")
|
||||||
|
|
||||||
|
except RuntimeError as e_fetch_comment:
|
||||||
|
self.logger(f" ⚠️ Error fetching or processing comments for post {post_id}: {e_fetch_comment}")
|
||||||
|
except Exception as e_generic_comment:
|
||||||
|
self.logger(f" ❌ Unexpected error during comment processing for post {post_id}: {e_generic_comment}\n{traceback.format_exc(limit=2)}")
|
||||||
|
self.logger(f" [Char Scope: Comments] Phase 2 Result: post_is_candidate_by_comment_char_match = {post_is_candidate_by_comment_char_match}")
|
||||||
|
else: # post_is_candidate_by_file_char_match_in_comment_scope was True
|
||||||
|
self.logger(f" [Char Scope: Comments] Phase 2: Skipped comment check for post ID '{post_id}' because a file match already made it a candidate.")
|
||||||
|
|
||||||
|
# --- Skip Post Logic based on Title or Comment Scope (if filters are active) ---
|
||||||
|
if self.filter_character_list_objects:
|
||||||
|
if self.char_filter_scope == CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match:
|
||||||
self.logger(f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title[:50]}' does not match character filters.")
|
self.logger(f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title[:50]}' does not match character filters.")
|
||||||
|
if self.signals and hasattr(self.signals, 'missed_character_post_signal'):
|
||||||
|
self.signals.missed_character_post_signal.emit(post_title, "No title match for character filter")
|
||||||
|
return 0, num_potential_files_in_post, []
|
||||||
|
if self.char_filter_scope == CHAR_SCOPE_COMMENTS and \
|
||||||
|
not post_is_candidate_by_file_char_match_in_comment_scope and \
|
||||||
|
not post_is_candidate_by_comment_char_match: # MODIFIED: Check both file and comment match flags
|
||||||
|
self.logger(f" -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id}', Title '{post_title[:50]}...'")
|
||||||
|
if self.signals and hasattr(self.signals, 'missed_character_post_signal'):
|
||||||
|
self.signals.missed_character_post_signal.emit(post_title, "No character match in files or comments (Comments scope)")
|
||||||
return 0, num_potential_files_in_post, []
|
return 0, num_potential_files_in_post, []
|
||||||
|
|
||||||
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH):
|
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH):
|
||||||
post_title_lower = post_title.lower()
|
post_title_lower = post_title.lower()
|
||||||
for skip_word in self.skip_words_list:
|
for skip_word in self.skip_words_list:
|
||||||
if skip_word.lower() in post_title_lower:
|
if skip_word.lower() in post_title_lower:
|
||||||
|
# This is a skip by "skip_words_list", not by character filter.
|
||||||
|
# If you want these in the "Missed Character Log" too, you'd add a signal emit here.
|
||||||
|
# For now, sticking to the request for character filter misses.
|
||||||
self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'. Scope: {self.skip_words_scope}")
|
self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'. Scope: {self.skip_words_scope}")
|
||||||
return 0, num_potential_files_in_post, []
|
return 0, num_potential_files_in_post, []
|
||||||
|
|
||||||
if not self.extract_links_only and self.manga_mode_active and self.filter_character_list and \
|
if not self.extract_links_only and self.manga_mode_active and self.filter_character_list_objects and \
|
||||||
(self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and \
|
(self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and \
|
||||||
not post_is_candidate_by_title_char_match:
|
not post_is_candidate_by_title_char_match:
|
||||||
self.logger(f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title[:50]}' doesn't match filters.")
|
self.logger(f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title[:50]}' doesn't match filters.")
|
||||||
|
if self.signals and hasattr(self.signals, 'missed_character_post_signal'):
|
||||||
|
self.signals.missed_character_post_signal.emit(post_title, "Manga Mode: No title match for character filter (Title/Both scope)")
|
||||||
return 0, num_potential_files_in_post, []
|
return 0, num_potential_files_in_post, []
|
||||||
|
|
||||||
if not isinstance(post_attachments, list):
|
if not isinstance(post_attachments, list):
|
||||||
@@ -781,9 +1037,26 @@ class PostProcessorWorker:
|
|||||||
|
|
||||||
base_folder_names_for_post_content = []
|
base_folder_names_for_post_content = []
|
||||||
if not self.extract_links_only and self.use_subfolders:
|
if not self.extract_links_only and self.use_subfolders:
|
||||||
if post_is_candidate_by_title_char_match and char_filter_that_matched_title:
|
primary_char_filter_for_folder = None
|
||||||
base_folder_names_for_post_content = [clean_folder_name(char_filter_that_matched_title)]
|
log_reason_for_folder = ""
|
||||||
else:
|
|
||||||
|
if self.char_filter_scope == CHAR_SCOPE_COMMENTS and char_filter_that_matched_comment:
|
||||||
|
# For CHAR_SCOPE_COMMENTS, prioritize file match for folder name if it happened
|
||||||
|
if post_is_candidate_by_file_char_match_in_comment_scope and char_filter_that_matched_file_in_comment_scope:
|
||||||
|
primary_char_filter_for_folder = char_filter_that_matched_file_in_comment_scope
|
||||||
|
log_reason_for_folder = "Matched char filter in filename (Comments scope)"
|
||||||
|
elif post_is_candidate_by_comment_char_match and char_filter_that_matched_comment: # Fallback to comment match
|
||||||
|
primary_char_filter_for_folder = char_filter_that_matched_comment
|
||||||
|
log_reason_for_folder = "Matched char filter in comments (Comments scope, no file match)"
|
||||||
|
elif (self.char_filter_scope == CHAR_SCOPE_TITLE or self.char_filter_scope == CHAR_SCOPE_BOTH) and char_filter_that_matched_title: # Existing logic for other scopes
|
||||||
|
primary_char_filter_for_folder = char_filter_that_matched_title
|
||||||
|
log_reason_for_folder = "Matched char filter in title"
|
||||||
|
# If scope is FILES, primary_char_filter_for_folder will be None here. Folder determined per file.
|
||||||
|
|
||||||
|
if primary_char_filter_for_folder:
|
||||||
|
base_folder_names_for_post_content = [clean_folder_name(primary_char_filter_for_folder["name"])]
|
||||||
|
self.logger(f" Base folder name(s) for post content ({log_reason_for_folder}): {', '.join(base_folder_names_for_post_content)}")
|
||||||
|
elif not self.filter_character_list_objects: # No char filters defined, use generic logic
|
||||||
derived_folders = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords)
|
derived_folders = match_folders_from_title(post_title, self.known_names, self.unwanted_keywords)
|
||||||
if derived_folders:
|
if derived_folders:
|
||||||
base_folder_names_for_post_content.extend(derived_folders)
|
base_folder_names_for_post_content.extend(derived_folders)
|
||||||
@@ -791,7 +1064,9 @@ class PostProcessorWorker:
|
|||||||
base_folder_names_for_post_content.append(extract_folder_name_from_title(post_title, self.unwanted_keywords))
|
base_folder_names_for_post_content.append(extract_folder_name_from_title(post_title, self.unwanted_keywords))
|
||||||
if not base_folder_names_for_post_content or not base_folder_names_for_post_content[0]:
|
if not base_folder_names_for_post_content or not base_folder_names_for_post_content[0]:
|
||||||
base_folder_names_for_post_content = [clean_folder_name(post_title if post_title else "untitled_creator_content")]
|
base_folder_names_for_post_content = [clean_folder_name(post_title if post_title else "untitled_creator_content")]
|
||||||
self.logger(f" Base folder name(s) for post content (if title matched char or generic): {', '.join(base_folder_names_for_post_content)}")
|
self.logger(f" Base folder name(s) for post content (Generic title parsing - no char filters): {', '.join(base_folder_names_for_post_content)}")
|
||||||
|
# If char filters are defined, and scope is FILES, then base_folder_names_for_post_content remains empty.
|
||||||
|
# The folder will be determined by char_filter_info_that_matched_file later.
|
||||||
|
|
||||||
if not self.extract_links_only and self.use_subfolders and self.skip_words_list:
|
if not self.extract_links_only and self.use_subfolders and self.skip_words_list:
|
||||||
for folder_name_to_check in base_folder_names_for_post_content:
|
for folder_name_to_check in base_folder_names_for_post_content:
|
||||||
@@ -907,28 +1182,62 @@ class PostProcessorWorker:
|
|||||||
current_api_original_filename = file_info_to_dl.get('_original_name_for_log')
|
current_api_original_filename = file_info_to_dl.get('_original_name_for_log')
|
||||||
|
|
||||||
file_is_candidate_by_char_filter_scope = False
|
file_is_candidate_by_char_filter_scope = False
|
||||||
char_filter_that_matched_file = None
|
char_filter_info_that_matched_file = None
|
||||||
|
|
||||||
if not self.filter_character_list:
|
if not self.filter_character_list_objects:
|
||||||
file_is_candidate_by_char_filter_scope = True
|
file_is_candidate_by_char_filter_scope = True
|
||||||
elif self.char_filter_scope == CHAR_SCOPE_FILES:
|
else:
|
||||||
for char_name in self.filter_character_list:
|
if self.char_filter_scope == CHAR_SCOPE_FILES:
|
||||||
if is_filename_match_for_character(current_api_original_filename, char_name):
|
for filter_item_obj in self.filter_character_list_objects:
|
||||||
|
terms_to_check_for_file = list(filter_item_obj["aliases"])
|
||||||
|
if filter_item_obj["is_group"] and filter_item_obj["name"] not in terms_to_check_for_file:
|
||||||
|
terms_to_check_for_file.append(filter_item_obj["name"])
|
||||||
|
unique_terms_for_file_check = list(set(terms_to_check_for_file))
|
||||||
|
|
||||||
|
for term_to_match in unique_terms_for_file_check:
|
||||||
|
if is_filename_match_for_character(current_api_original_filename, term_to_match):
|
||||||
file_is_candidate_by_char_filter_scope = True
|
file_is_candidate_by_char_filter_scope = True
|
||||||
char_filter_that_matched_file = char_name
|
char_filter_info_that_matched_file = filter_item_obj
|
||||||
|
self.logger(f" File '{current_api_original_filename}' matches char filter term '{term_to_match}' (from '{filter_item_obj['name']}'). Scope: Files.")
|
||||||
break
|
break
|
||||||
|
if file_is_candidate_by_char_filter_scope: break
|
||||||
elif self.char_filter_scope == CHAR_SCOPE_TITLE:
|
elif self.char_filter_scope == CHAR_SCOPE_TITLE:
|
||||||
if post_is_candidate_by_title_char_match:
|
if post_is_candidate_by_title_char_match:
|
||||||
file_is_candidate_by_char_filter_scope = True
|
file_is_candidate_by_char_filter_scope = True
|
||||||
|
char_filter_info_that_matched_file = char_filter_that_matched_title
|
||||||
|
self.logger(f" File '{current_api_original_filename}' is candidate because post title matched. Scope: Title.")
|
||||||
elif self.char_filter_scope == CHAR_SCOPE_BOTH:
|
elif self.char_filter_scope == CHAR_SCOPE_BOTH:
|
||||||
if post_is_candidate_by_title_char_match:
|
if post_is_candidate_by_title_char_match:
|
||||||
file_is_candidate_by_char_filter_scope = True
|
file_is_candidate_by_char_filter_scope = True
|
||||||
|
char_filter_info_that_matched_file = char_filter_that_matched_title
|
||||||
|
self.logger(f" File '{current_api_original_filename}' is candidate because post title matched. Scope: Both (Title part).")
|
||||||
else:
|
else:
|
||||||
for char_name in self.filter_character_list:
|
# This part is for the "File" part of "Both" scope
|
||||||
if is_filename_match_for_character(current_api_original_filename, char_name):
|
for filter_item_obj_both_file in self.filter_character_list_objects:
|
||||||
|
terms_to_check_for_file_both = list(filter_item_obj_both_file["aliases"])
|
||||||
|
if filter_item_obj_both_file["is_group"] and filter_item_obj_both_file["name"] not in terms_to_check_for_file_both:
|
||||||
|
terms_to_check_for_file_both.append(filter_item_obj_both_file["name"])
|
||||||
|
# Ensure unique_terms_for_file_both_check is defined here
|
||||||
|
unique_terms_for_file_both_check = list(set(terms_to_check_for_file_both))
|
||||||
|
|
||||||
|
for term_to_match in unique_terms_for_file_both_check:
|
||||||
|
if is_filename_match_for_character(current_api_original_filename, term_to_match):
|
||||||
file_is_candidate_by_char_filter_scope = True
|
file_is_candidate_by_char_filter_scope = True
|
||||||
char_filter_that_matched_file = char_name
|
char_filter_info_that_matched_file = filter_item_obj_both_file # Use the filter that matched the file
|
||||||
|
self.logger(f" File '{current_api_original_filename}' matches char filter term '{term_to_match}' (from '{filter_item_obj['name']}'). Scope: Both (File part).")
|
||||||
break
|
break
|
||||||
|
if file_is_candidate_by_char_filter_scope: break
|
||||||
|
elif self.char_filter_scope == CHAR_SCOPE_COMMENTS:
|
||||||
|
# If the post is a candidate (either by file or comment under this scope), then this file is also a candidate.
|
||||||
|
# The folder naming will use the filter that made the POST a candidate.
|
||||||
|
if post_is_candidate_by_file_char_match_in_comment_scope: # Post was candidate due to a file match
|
||||||
|
file_is_candidate_by_char_filter_scope = True
|
||||||
|
char_filter_info_that_matched_file = char_filter_that_matched_file_in_comment_scope # Use the filter that matched a file in the post
|
||||||
|
self.logger(f" File '{current_api_original_filename}' is candidate because a file in this post matched char filter (Overall Scope: Comments).")
|
||||||
|
elif post_is_candidate_by_comment_char_match: # Post was candidate due to comment match (no file match for post)
|
||||||
|
file_is_candidate_by_char_filter_scope = True
|
||||||
|
char_filter_info_that_matched_file = char_filter_that_matched_comment # Use the filter that matched comments
|
||||||
|
self.logger(f" File '{current_api_original_filename}' is candidate because post comments matched char filter (Overall Scope: Comments).")
|
||||||
|
|
||||||
if not file_is_candidate_by_char_filter_scope:
|
if not file_is_candidate_by_char_filter_scope:
|
||||||
self.logger(f" -> Skip File (Char Filter Scope '{self.char_filter_scope}'): '{current_api_original_filename}' no match.")
|
self.logger(f" -> Skip File (Char Filter Scope '{self.char_filter_scope}'): '{current_api_original_filename}' no match.")
|
||||||
@@ -941,10 +1250,10 @@ class PostProcessorWorker:
|
|||||||
char_title_subfolder_name = None
|
char_title_subfolder_name = None
|
||||||
if self.target_post_id_from_initial_url and self.custom_folder_name:
|
if self.target_post_id_from_initial_url and self.custom_folder_name:
|
||||||
char_title_subfolder_name = self.custom_folder_name
|
char_title_subfolder_name = self.custom_folder_name
|
||||||
|
elif char_filter_info_that_matched_file:
|
||||||
|
char_title_subfolder_name = clean_folder_name(char_filter_info_that_matched_file["name"])
|
||||||
elif char_filter_that_matched_title:
|
elif char_filter_that_matched_title:
|
||||||
char_title_subfolder_name = clean_folder_name(char_filter_that_matched_title)
|
char_title_subfolder_name = clean_folder_name(char_filter_that_matched_title["name"])
|
||||||
elif char_filter_that_matched_file:
|
|
||||||
char_title_subfolder_name = clean_folder_name(char_filter_that_matched_file)
|
|
||||||
elif base_folder_names_for_post_content:
|
elif base_folder_names_for_post_content:
|
||||||
char_title_subfolder_name = base_folder_names_for_post_content[0]
|
char_title_subfolder_name = base_folder_names_for_post_content[0]
|
||||||
|
|
||||||
@@ -953,7 +1262,7 @@ class PostProcessorWorker:
|
|||||||
|
|
||||||
if self.use_post_subfolders:
|
if self.use_post_subfolders:
|
||||||
cleaned_title_for_subfolder = clean_folder_name(post_title)
|
cleaned_title_for_subfolder = clean_folder_name(post_title)
|
||||||
post_specific_subfolder_name = f"{post_id}_{cleaned_title_for_subfolder}" if cleaned_title_for_subfolder else f"{post_id}_untitled"
|
post_specific_subfolder_name = cleaned_title_for_subfolder # Use only the cleaned title
|
||||||
current_path_for_file = os.path.join(current_path_for_file, post_specific_subfolder_name)
|
current_path_for_file = os.path.join(current_path_for_file, post_specific_subfolder_name)
|
||||||
|
|
||||||
target_folder_path_for_this_file = current_path_for_file
|
target_folder_path_for_this_file = current_path_for_file
|
||||||
@@ -990,7 +1299,7 @@ class PostProcessorWorker:
|
|||||||
total_skipped_this_post += 1
|
total_skipped_this_post += 1
|
||||||
|
|
||||||
if self.signals and hasattr(self.signals, 'file_progress_signal'):
|
if self.signals and hasattr(self.signals, 'file_progress_signal'):
|
||||||
self.signals.file_progress_signal.emit("", 0, 0)
|
self.signals.file_progress_signal.emit("", None)
|
||||||
|
|
||||||
if self.check_cancel(): self.logger(f" Post {post_id} processing interrupted/cancelled.");
|
if self.check_cancel(): self.logger(f" Post {post_id} processing interrupted/cancelled.");
|
||||||
else: self.logger(f" Post {post_id} Summary: Downloaded={total_downloaded_this_post}, Skipped Files={total_skipped_this_post}")
|
else: self.logger(f" Post {post_id} Summary: Downloaded={total_downloaded_this_post}, Skipped Files={total_skipped_this_post}")
|
||||||
@@ -1004,7 +1313,8 @@ class DownloadThread(QThread):
|
|||||||
file_download_status_signal = pyqtSignal(bool)
|
file_download_status_signal = pyqtSignal(bool)
|
||||||
finished_signal = pyqtSignal(int, int, bool, list)
|
finished_signal = pyqtSignal(int, int, bool, list)
|
||||||
external_link_signal = pyqtSignal(str, str, str, str)
|
external_link_signal = pyqtSignal(str, str, str, str)
|
||||||
file_progress_signal = pyqtSignal(str, int, int)
|
file_progress_signal = pyqtSignal(str, object)
|
||||||
|
missed_character_post_signal = pyqtSignal(str, str) # New: post_title, reason
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, api_url_input, output_dir, known_names_copy,
|
def __init__(self, api_url_input, output_dir, known_names_copy,
|
||||||
@@ -1025,8 +1335,10 @@ class DownloadThread(QThread):
|
|||||||
manga_mode_active=False,
|
manga_mode_active=False,
|
||||||
unwanted_keywords=None,
|
unwanted_keywords=None,
|
||||||
manga_filename_style=STYLE_POST_TITLE,
|
manga_filename_style=STYLE_POST_TITLE,
|
||||||
char_filter_scope=CHAR_SCOPE_FILES
|
char_filter_scope=CHAR_SCOPE_FILES,
|
||||||
):
|
remove_from_filename_words_list=None,
|
||||||
|
allow_multipart_download=True,
|
||||||
|
): # Removed duplicate_file_mode and session-wide tracking
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.api_url_input = api_url_input
|
self.api_url_input = api_url_input
|
||||||
self.output_dir = output_dir
|
self.output_dir = output_dir
|
||||||
@@ -1034,7 +1346,7 @@ class DownloadThread(QThread):
|
|||||||
self.cancellation_event = cancellation_event
|
self.cancellation_event = cancellation_event
|
||||||
self.skip_current_file_flag = skip_current_file_flag
|
self.skip_current_file_flag = skip_current_file_flag
|
||||||
self.initial_target_post_id = target_post_id_from_initial_url
|
self.initial_target_post_id = target_post_id_from_initial_url
|
||||||
self.filter_character_list = filter_character_list if filter_character_list else []
|
self.filter_character_list_objects = filter_character_list if filter_character_list else []
|
||||||
self.filter_mode = filter_mode
|
self.filter_mode = filter_mode
|
||||||
self.skip_zip = skip_zip
|
self.skip_zip = skip_zip
|
||||||
self.skip_rar = skip_rar
|
self.skip_rar = skip_rar
|
||||||
@@ -1065,7 +1377,9 @@ class DownloadThread(QThread):
|
|||||||
{'spicy', 'hd', 'nsfw', '4k', 'preview', 'teaser', 'clip'}
|
{'spicy', 'hd', 'nsfw', '4k', 'preview', 'teaser', 'clip'}
|
||||||
self.manga_filename_style = manga_filename_style
|
self.manga_filename_style = manga_filename_style
|
||||||
self.char_filter_scope = char_filter_scope
|
self.char_filter_scope = char_filter_scope
|
||||||
|
self.remove_from_filename_words_list = remove_from_filename_words_list
|
||||||
|
self.allow_multipart_download = allow_multipart_download
|
||||||
|
# self.duplicate_file_mode and session-wide tracking removed
|
||||||
if self.compress_images and Image is None:
|
if self.compress_images and Image is None:
|
||||||
self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
|
self.logger("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
|
||||||
self.compress_images = False
|
self.compress_images = False
|
||||||
@@ -1096,6 +1410,7 @@ class DownloadThread(QThread):
|
|||||||
worker_signals_obj.file_download_status_signal.connect(self.file_download_status_signal)
|
worker_signals_obj.file_download_status_signal.connect(self.file_download_status_signal)
|
||||||
worker_signals_obj.file_progress_signal.connect(self.file_progress_signal)
|
worker_signals_obj.file_progress_signal.connect(self.file_progress_signal)
|
||||||
worker_signals_obj.external_link_signal.connect(self.external_link_signal)
|
worker_signals_obj.external_link_signal.connect(self.external_link_signal)
|
||||||
|
worker_signals_obj.missed_character_post_signal.connect(self.missed_character_post_signal) # New connection
|
||||||
|
|
||||||
self.logger(" Starting post fetch (single-threaded download process)...")
|
self.logger(" Starting post fetch (single-threaded download process)...")
|
||||||
post_generator = download_from_api(
|
post_generator = download_from_api(
|
||||||
@@ -1116,7 +1431,7 @@ class DownloadThread(QThread):
|
|||||||
post_data=individual_post_data,
|
post_data=individual_post_data,
|
||||||
download_root=self.output_dir,
|
download_root=self.output_dir,
|
||||||
known_names=self.known_names,
|
known_names=self.known_names,
|
||||||
filter_character_list=self.filter_character_list,
|
filter_character_list=self.filter_character_list_objects,
|
||||||
unwanted_keywords=self.unwanted_keywords,
|
unwanted_keywords=self.unwanted_keywords,
|
||||||
filter_mode=self.filter_mode,
|
filter_mode=self.filter_mode,
|
||||||
skip_zip=self.skip_zip, skip_rar=self.skip_rar,
|
skip_zip=self.skip_zip, skip_rar=self.skip_rar,
|
||||||
@@ -1140,8 +1455,10 @@ class DownloadThread(QThread):
|
|||||||
skip_current_file_flag=self.skip_current_file_flag,
|
skip_current_file_flag=self.skip_current_file_flag,
|
||||||
manga_mode_active=self.manga_mode_active,
|
manga_mode_active=self.manga_mode_active,
|
||||||
manga_filename_style=self.manga_filename_style,
|
manga_filename_style=self.manga_filename_style,
|
||||||
char_filter_scope=self.char_filter_scope
|
char_filter_scope=self.char_filter_scope,
|
||||||
)
|
remove_from_filename_words_list=self.remove_from_filename_words_list,
|
||||||
|
allow_multipart_download=self.allow_multipart_download,
|
||||||
|
) # Removed duplicate_file_mode and session-wide tracking
|
||||||
try:
|
try:
|
||||||
dl_count, skip_count, kept_originals_this_post = post_processing_worker.process()
|
dl_count, skip_count, kept_originals_this_post = post_processing_worker.process()
|
||||||
grand_total_downloaded_files += dl_count
|
grand_total_downloaded_files += dl_count
|
||||||
@@ -1177,6 +1494,7 @@ class DownloadThread(QThread):
|
|||||||
worker_signals_obj.file_download_status_signal.disconnect(self.file_download_status_signal)
|
worker_signals_obj.file_download_status_signal.disconnect(self.file_download_status_signal)
|
||||||
worker_signals_obj.external_link_signal.disconnect(self.external_link_signal)
|
worker_signals_obj.external_link_signal.disconnect(self.external_link_signal)
|
||||||
worker_signals_obj.file_progress_signal.disconnect(self.file_progress_signal)
|
worker_signals_obj.file_progress_signal.disconnect(self.file_progress_signal)
|
||||||
|
worker_signals_obj.missed_character_post_signal.disconnect(self.missed_character_post_signal) # New disconnection
|
||||||
except (TypeError, RuntimeError) as e:
|
except (TypeError, RuntimeError) as e:
|
||||||
self.logger(f"ℹ️ Note during DownloadThread signal disconnection: {e}")
|
self.logger(f"ℹ️ Note during DownloadThread signal disconnection: {e}")
|
||||||
|
|
||||||
|
|||||||
232
multipart_downloader.py
Normal file
232
multipart_downloader.py
Normal file
@@ -0,0 +1,232 @@
|
|||||||
|
import os
|
||||||
|
import time
|
||||||
|
import requests
|
||||||
|
import hashlib
|
||||||
|
import http.client
|
||||||
|
import traceback
|
||||||
|
import threading
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
|
||||||
|
CHUNK_DOWNLOAD_RETRY_DELAY = 2 # Slightly reduced for faster retries if needed
|
||||||
|
MAX_CHUNK_DOWNLOAD_RETRIES = 1 # Further reduced for quicker fallback if a chunk is problematic
|
||||||
|
DOWNLOAD_CHUNK_SIZE_ITER = 1024 * 256 # 256KB for iter_content within a chunk download
|
||||||
|
|
||||||
|
|
||||||
|
def _download_individual_chunk(chunk_url, temp_file_path, start_byte, end_byte, headers,
|
||||||
|
part_num, total_parts, progress_data, cancellation_event, skip_event, logger,
|
||||||
|
signals=None, api_original_filename=None): # Added signals and api_original_filename
|
||||||
|
"""Downloads a single chunk of a file and writes it to the temp file."""
|
||||||
|
if cancellation_event and cancellation_event.is_set():
|
||||||
|
logger(f" [Chunk {part_num + 1}/{total_parts}] Download cancelled before start.")
|
||||||
|
return 0, False # bytes_downloaded, success
|
||||||
|
if skip_event and skip_event.is_set():
|
||||||
|
logger(f" [Chunk {part_num + 1}/{total_parts}] Skip event triggered before start.")
|
||||||
|
return 0, False
|
||||||
|
|
||||||
|
chunk_headers = headers.copy()
|
||||||
|
# end_byte can be -1 for 0-byte files, meaning download from start_byte to end of file (which is start_byte itself)
|
||||||
|
if end_byte != -1 : # For 0-byte files, end_byte might be -1, Range header should not be set or be 0-0
|
||||||
|
chunk_headers['Range'] = f"bytes={start_byte}-{end_byte}"
|
||||||
|
elif start_byte == 0 and end_byte == -1: # Specifically for 0-byte files
|
||||||
|
# Some servers might not like Range: bytes=0--1.
|
||||||
|
# For a 0-byte file, we might not even need a range header, or Range: bytes=0-0
|
||||||
|
# Let's try without for 0-byte, or rely on server to handle 0-0 if Content-Length was 0.
|
||||||
|
# If Content-Length was 0, the main function might handle it directly.
|
||||||
|
# This chunking logic is primarily for files > 0 bytes.
|
||||||
|
# For now, if end_byte is -1, it implies a 0-byte file, so we expect 0 bytes.
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
bytes_this_chunk = 0
|
||||||
|
last_progress_emit_time_for_chunk = time.time()
|
||||||
|
last_speed_calc_time = time.time()
|
||||||
|
bytes_at_last_speed_calc = 0
|
||||||
|
|
||||||
|
for attempt in range(MAX_CHUNK_DOWNLOAD_RETRIES + 1):
|
||||||
|
if cancellation_event and cancellation_event.is_set():
|
||||||
|
logger(f" [Chunk {part_num + 1}/{total_parts}] Cancelled during retry loop.")
|
||||||
|
return bytes_this_chunk, False
|
||||||
|
if skip_event and skip_event.is_set():
|
||||||
|
logger(f" [Chunk {part_num + 1}/{total_parts}] Skip event during retry loop.")
|
||||||
|
return bytes_this_chunk, False
|
||||||
|
|
||||||
|
try:
|
||||||
|
if attempt > 0:
|
||||||
|
logger(f" [Chunk {part_num + 1}/{total_parts}] Retrying download (Attempt {attempt}/{MAX_CHUNK_DOWNLOAD_RETRIES})...")
|
||||||
|
time.sleep(CHUNK_DOWNLOAD_RETRY_DELAY * (2 ** (attempt - 1)))
|
||||||
|
# Reset speed calculation on retry
|
||||||
|
last_speed_calc_time = time.time()
|
||||||
|
bytes_at_last_speed_calc = bytes_this_chunk # Current progress of this chunk
|
||||||
|
|
||||||
|
# Enhanced log message for chunk start
|
||||||
|
log_msg = f" 🚀 [Chunk {part_num + 1}/{total_parts}] Starting download: bytes {start_byte}-{end_byte if end_byte != -1 else 'EOF'}"
|
||||||
|
logger(log_msg)
|
||||||
|
print(f"DEBUG_MULTIPART: {log_msg}") # Direct console print for debugging
|
||||||
|
response = requests.get(chunk_url, headers=chunk_headers, timeout=(10, 120), stream=True)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
# For 0-byte files, if end_byte was -1, we expect 0 content.
|
||||||
|
if start_byte == 0 and end_byte == -1 and int(response.headers.get('Content-Length', 0)) == 0:
|
||||||
|
logger(f" [Chunk {part_num + 1}/{total_parts}] Confirmed 0-byte file.")
|
||||||
|
with progress_data['lock']:
|
||||||
|
progress_data['chunks_status'][part_num]['active'] = False
|
||||||
|
progress_data['chunks_status'][part_num]['speed_bps'] = 0
|
||||||
|
return 0, True
|
||||||
|
|
||||||
|
with open(temp_file_path, 'r+b') as f: # Open in read-write binary
|
||||||
|
f.seek(start_byte)
|
||||||
|
for data_segment in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE_ITER):
|
||||||
|
if cancellation_event and cancellation_event.is_set():
|
||||||
|
logger(f" [Chunk {part_num + 1}/{total_parts}] Cancelled during data iteration.")
|
||||||
|
return bytes_this_chunk, False
|
||||||
|
if skip_event and skip_event.is_set():
|
||||||
|
logger(f" [Chunk {part_num + 1}/{total_parts}] Skip event during data iteration.")
|
||||||
|
return bytes_this_chunk, False
|
||||||
|
if data_segment:
|
||||||
|
f.write(data_segment)
|
||||||
|
bytes_this_chunk += len(data_segment)
|
||||||
|
|
||||||
|
with progress_data['lock']:
|
||||||
|
# Increment both the chunk's downloaded and the overall downloaded
|
||||||
|
progress_data['total_downloaded_so_far'] += len(data_segment)
|
||||||
|
progress_data['chunks_status'][part_num]['downloaded'] = bytes_this_chunk
|
||||||
|
progress_data['chunks_status'][part_num]['active'] = True
|
||||||
|
|
||||||
|
current_time = time.time()
|
||||||
|
time_delta_speed = current_time - last_speed_calc_time
|
||||||
|
if time_delta_speed > 0.5: # Calculate speed every 0.5 seconds
|
||||||
|
bytes_delta = bytes_this_chunk - bytes_at_last_speed_calc
|
||||||
|
current_speed_bps = (bytes_delta * 8) / time_delta_speed if time_delta_speed > 0 else 0
|
||||||
|
progress_data['chunks_status'][part_num]['speed_bps'] = current_speed_bps
|
||||||
|
last_speed_calc_time = current_time
|
||||||
|
bytes_at_last_speed_calc = bytes_this_chunk
|
||||||
|
|
||||||
|
# Emit progress more frequently from within the chunk download
|
||||||
|
if current_time - last_progress_emit_time_for_chunk > 0.1: # Emit up to 10 times/sec per chunk
|
||||||
|
if signals and hasattr(signals, 'file_progress_signal'):
|
||||||
|
# Ensure we read the latest total downloaded from progress_data
|
||||||
|
# Send a copy of the chunks_status list
|
||||||
|
status_list_copy = [dict(s) for s in progress_data['chunks_status']] # Make a deep enough copy
|
||||||
|
signals.file_progress_signal.emit(api_original_filename, status_list_copy)
|
||||||
|
last_progress_emit_time_for_chunk = current_time
|
||||||
|
return bytes_this_chunk, True
|
||||||
|
|
||||||
|
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, http.client.IncompleteRead) as e:
|
||||||
|
logger(f" ❌ [Chunk {part_num + 1}/{total_parts}] Retryable error: {e}")
|
||||||
|
if attempt == MAX_CHUNK_DOWNLOAD_RETRIES:
|
||||||
|
logger(f" ❌ [Chunk {part_num + 1}/{total_parts}] Failed after {MAX_CHUNK_DOWNLOAD_RETRIES} retries.")
|
||||||
|
return bytes_this_chunk, False
|
||||||
|
except requests.exceptions.RequestException as e: # Includes 4xx/5xx errors after raise_for_status
|
||||||
|
logger(f" ❌ [Chunk {part_num + 1}/{total_parts}] Non-retryable error: {e}")
|
||||||
|
return bytes_this_chunk, False
|
||||||
|
except Exception as e:
|
||||||
|
logger(f" ❌ [Chunk {part_num + 1}/{total_parts}] Unexpected error: {e}\n{traceback.format_exc(limit=1)}")
|
||||||
|
return bytes_this_chunk, False
|
||||||
|
|
||||||
|
# Ensure final status is marked as inactive if loop finishes due to retries
|
||||||
|
with progress_data['lock']:
|
||||||
|
progress_data['chunks_status'][part_num]['active'] = False
|
||||||
|
progress_data['chunks_status'][part_num]['speed_bps'] = 0
|
||||||
|
return bytes_this_chunk, False # Should be unreachable
|
||||||
|
|
||||||
|
|
||||||
|
def download_file_in_parts(file_url, save_path, total_size, num_parts, headers,
|
||||||
|
api_original_filename, signals, cancellation_event, skip_event, logger):
|
||||||
|
"""
|
||||||
|
Downloads a file in multiple parts concurrently.
|
||||||
|
Returns: (download_successful_flag, downloaded_bytes, calculated_file_hash, temp_file_handle_or_None)
|
||||||
|
The temp_file_handle will be an open read-binary file handle to the .part file if successful, otherwise None.
|
||||||
|
It is the responsibility of the caller to close this handle and rename/delete the .part file.
|
||||||
|
"""
|
||||||
|
logger(f"⬇️ Initializing Multi-part Download ({num_parts} parts) for: '{api_original_filename}' (Size: {total_size / (1024*1024):.2f} MB)")
|
||||||
|
temp_file_path = save_path + ".part"
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(temp_file_path, 'wb') as f_temp:
|
||||||
|
if total_size > 0:
|
||||||
|
f_temp.truncate(total_size) # Pre-allocate space
|
||||||
|
except IOError as e:
|
||||||
|
logger(f" ❌ Error creating/truncating temp file '{temp_file_path}': {e}")
|
||||||
|
return False, 0, None, None
|
||||||
|
|
||||||
|
chunk_size_calc = total_size // num_parts
|
||||||
|
chunks_ranges = []
|
||||||
|
for i in range(num_parts):
|
||||||
|
start = i * chunk_size_calc
|
||||||
|
end = start + chunk_size_calc - 1 if i < num_parts - 1 else total_size - 1
|
||||||
|
if start <= end: # Valid range
|
||||||
|
chunks_ranges.append((start, end))
|
||||||
|
elif total_size == 0 and i == 0: # Special case for 0-byte file
|
||||||
|
chunks_ranges.append((0, -1)) # Indicates 0-byte file, download 0 bytes from offset 0
|
||||||
|
|
||||||
|
chunk_actual_sizes = []
|
||||||
|
for start, end in chunks_ranges:
|
||||||
|
if end == -1 and start == 0: # 0-byte file
|
||||||
|
chunk_actual_sizes.append(0)
|
||||||
|
else:
|
||||||
|
chunk_actual_sizes.append(end - start + 1)
|
||||||
|
|
||||||
|
if not chunks_ranges and total_size > 0:
|
||||||
|
logger(f" ⚠️ No valid chunk ranges for multipart download of '{api_original_filename}'. Aborting multipart.")
|
||||||
|
if os.path.exists(temp_file_path): os.remove(temp_file_path)
|
||||||
|
return False, 0, None, None
|
||||||
|
|
||||||
|
progress_data = {
|
||||||
|
'total_file_size': total_size, # Overall file size for reference
|
||||||
|
'total_downloaded_so_far': 0, # New key for overall progress
|
||||||
|
'chunks_status': [ # Status for each chunk
|
||||||
|
{'id': i, 'downloaded': 0, 'total': chunk_actual_sizes[i] if i < len(chunk_actual_sizes) else 0, 'active': False, 'speed_bps': 0.0}
|
||||||
|
for i in range(num_parts)
|
||||||
|
],
|
||||||
|
'lock': threading.Lock()
|
||||||
|
}
|
||||||
|
|
||||||
|
chunk_futures = []
|
||||||
|
all_chunks_successful = True
|
||||||
|
total_bytes_from_chunks = 0 # Still useful to verify total downloaded against file size
|
||||||
|
|
||||||
|
with ThreadPoolExecutor(max_workers=num_parts, thread_name_prefix=f"MPChunk_{api_original_filename[:10]}_") as chunk_pool:
|
||||||
|
for i, (start, end) in enumerate(chunks_ranges):
|
||||||
|
if cancellation_event and cancellation_event.is_set(): all_chunks_successful = False; break
|
||||||
|
chunk_futures.append(chunk_pool.submit(
|
||||||
|
_download_individual_chunk, chunk_url=file_url, temp_file_path=temp_file_path,
|
||||||
|
start_byte=start, end_byte=end, headers=headers, part_num=i, total_parts=num_parts,
|
||||||
|
progress_data=progress_data, cancellation_event=cancellation_event, skip_event=skip_event, logger=logger,
|
||||||
|
signals=signals, api_original_filename=api_original_filename # Pass them here
|
||||||
|
))
|
||||||
|
|
||||||
|
for future in as_completed(chunk_futures):
|
||||||
|
if cancellation_event and cancellation_event.is_set(): all_chunks_successful = False; break
|
||||||
|
bytes_downloaded_this_chunk, success_this_chunk = future.result()
|
||||||
|
total_bytes_from_chunks += bytes_downloaded_this_chunk
|
||||||
|
if not success_this_chunk:
|
||||||
|
all_chunks_successful = False
|
||||||
|
# Progress is emitted from within _download_individual_chunk
|
||||||
|
|
||||||
|
if cancellation_event and cancellation_event.is_set():
|
||||||
|
logger(f" Multi-part download for '{api_original_filename}' cancelled by main event.")
|
||||||
|
all_chunks_successful = False
|
||||||
|
|
||||||
|
# Ensure a final progress update is sent with all chunks marked inactive (unless still active due to error)
|
||||||
|
if signals and hasattr(signals, 'file_progress_signal'):
|
||||||
|
with progress_data['lock']:
|
||||||
|
# Ensure all chunks are marked inactive for the final signal if download didn't fully succeed or was cancelled
|
||||||
|
status_list_copy = [dict(s) for s in progress_data['chunks_status']]
|
||||||
|
signals.file_progress_signal.emit(api_original_filename, status_list_copy)
|
||||||
|
|
||||||
|
if all_chunks_successful and (total_bytes_from_chunks == total_size or total_size == 0):
|
||||||
|
logger(f" ✅ Multi-part download successful for '{api_original_filename}'. Total bytes: {total_bytes_from_chunks}")
|
||||||
|
md5_hasher = hashlib.md5()
|
||||||
|
with open(temp_file_path, 'rb') as f_hash:
|
||||||
|
for buf in iter(lambda: f_hash.read(4096*10), b''): # Read in larger buffers for hashing
|
||||||
|
md5_hasher.update(buf)
|
||||||
|
calculated_hash = md5_hasher.hexdigest()
|
||||||
|
# Return an open file handle for the caller to manage (e.g., for compression)
|
||||||
|
# The caller is responsible for closing this handle and renaming/deleting the .part file.
|
||||||
|
return True, total_bytes_from_chunks, calculated_hash, open(temp_file_path, 'rb')
|
||||||
|
else:
|
||||||
|
logger(f" ❌ Multi-part download failed for '{api_original_filename}'. Success: {all_chunks_successful}, Bytes: {total_bytes_from_chunks}/{total_size}. Cleaning up.")
|
||||||
|
if os.path.exists(temp_file_path):
|
||||||
|
try: os.remove(temp_file_path)
|
||||||
|
except OSError as e: logger(f" Failed to remove temp part file '{temp_file_path}': {e}")
|
||||||
|
return False, total_bytes_from_chunks, None, None
|
||||||
223
readme.md
223
readme.md
@@ -1,99 +1,204 @@
|
|||||||
# Kemono Downloader v3.1.0
|
# Kemono Downloader v3.2.0
|
||||||
|
|
||||||
A feature-rich GUI application built with PyQt5 to download content from [Kemono.su](https://kemono.su) or [Coomer.party](https://coomer.party). Offers robust filtering, smart organization, manga-specific handling, and performance tuning. Now with session resuming, better retry logic, and smarter file management.
|
A feature-rich GUI application built with PyQt5 to download content from **Kemono.su** or **Coomer.party**.
|
||||||
|
Offers robust filtering, smart organization, manga-specific handling, and performance tuning.
|
||||||
|
|
||||||
|
This version introduces:
|
||||||
|
- Multi-part downloads
|
||||||
|
- Character filtering by comments
|
||||||
|
- Filename word removal
|
||||||
|
- Various UI/workflow enhancements
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 🚀 What's New in v3.1.0
|
## 🚀 What's New in v3.2.0
|
||||||
|
|
||||||
* **Session Resuming**
|
### 🔹 Character Filter by Post Comments (Beta)
|
||||||
* Automatically saves and resumes incomplete downloads.
|
|
||||||
|
|
||||||
* **Retry on Failure**
|
- New "Comments" scope for the 'Filter by Character(s)' feature.
|
||||||
* Failed files auto-retry up to 3 times.
|
|
||||||
* Clear logging for each retry attempt.
|
|
||||||
|
|
||||||
* **Improved Manga Mode**
|
**How it works:**
|
||||||
* Better post ordering and handling of missing or untitled posts.
|
1. Checks if any **filenames** match your character filter. If yes → downloads the post (skips comment check).
|
||||||
* Optional numeric-only sorting for consistent naming.
|
2. If no filename matches → scans the **post's comments**. If matched → downloads the post.
|
||||||
|
|
||||||
* **UI Enhancements**
|
- Prioritizes filename-matched character name for folder naming, otherwise uses comment match.
|
||||||
* Settings persist across sessions.
|
- Cycle through filter scopes with the `Filter: [Scope]` button next to the character input.
|
||||||
* Improved layout spacing, tooltips, and status indicators.
|
|
||||||
|
|
||||||
* **Stability & Speed**
|
---
|
||||||
* Faster post fetching with lower memory usage.
|
|
||||||
* Minor bug fixes (duplicate folders, empty post crashes).
|
### ✂️ Remove Specific Words from Filenames
|
||||||
|
|
||||||
|
- Input field: `"✂️ Remove Words from name"`
|
||||||
|
- Enter comma-separated words (e.g., `patreon, kemono, [HD], _final`)
|
||||||
|
- These are removed from filenames (case-insensitive) to improve organization.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 🧩 Multi-part Downloads for Large Files
|
||||||
|
|
||||||
|
- Toggle multi-part downloads (OFF by default).
|
||||||
|
- Improves speed on large files (e.g., >10MB videos, zips).
|
||||||
|
- Falls back to single-stream on failure.
|
||||||
|
- Toggle via `Multi-part: ON/OFF` in the log header.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 🧠 UI and Workflow Enhancements
|
||||||
|
|
||||||
|
- **Updated Welcome Tour**
|
||||||
|
Shows on first launch, covers all new and core features.
|
||||||
|
|
||||||
|
- **Smarter Cancel/Reset**
|
||||||
|
Cancels active tasks and resets UI — but retains URL and Download Directory fields.
|
||||||
|
|
||||||
|
- **Simplified Interface**
|
||||||
|
- Removed "Skip Current File" and local API server for a cleaner experience.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 📁 Refined File & Duplicate Handling
|
||||||
|
|
||||||
|
- **Duplicate Filenames**
|
||||||
|
Adds numeric suffix (`file.jpg`, `file_1.jpg`, etc.).
|
||||||
|
Removed the "Duplicate" subfolder system.
|
||||||
|
|
||||||
|
- **Efficient Hash Check**
|
||||||
|
Detects and skips duplicate files within the same session (before writing to disk).
|
||||||
|
|
||||||
|
- **Better Temp File Cleanup**
|
||||||
|
Cleans up `.part` files — especially if duplicate or compressed post-download.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 🧩 Core Features
|
## 🧩 Core Features
|
||||||
|
|
||||||
* **Simple GUI**
|
### 🎛 Simple GUI
|
||||||
Built with PyQt5 for a clean, responsive experience.
|
- Built with **PyQt5**
|
||||||
|
- Dark theme, responsive layout
|
||||||
|
|
||||||
* **Supports Both Post and Creator URLs**
|
### 📥 Supports Post and Creator URLs
|
||||||
Download a single post or an entire feed with one click.
|
- Download a single post or an entire creator’s feed.
|
||||||
|
|
||||||
* **Smart Folder System**
|
### 🔢 Page Range Support
|
||||||
Organize files using post titles, known character/show names, or a folder per post.
|
- Choose page range when downloading creator feeds (except in Manga Mode).
|
||||||
Detects and auto-names folders based on custom keywords.
|
|
||||||
|
|
||||||
* **Known Names Manager**
|
---
|
||||||
Add, search, and delete tags for smarter organization.
|
|
||||||
Saved to `Known.txt` for reuse.
|
|
||||||
|
|
||||||
* **Advanced Filters**
|
### 🗂 Smart Folder System
|
||||||
* Skip posts or files with specific keywords (e.g. `WIP`, `sketch`).
|
|
||||||
* Filter by media type: images, videos, or GIFs.
|
|
||||||
* Skip `.zip` and `.rar` archives.
|
|
||||||
|
|
||||||
* **Manga Mode**
|
- Organize by character names, post titles, or custom labels.
|
||||||
Rename and sort manga posts by title and upload order.
|
- Option to create a separate folder for each post.
|
||||||
Handles one-image-per-post formats cleanly.
|
- Uses `Known.txt` for fallback names.
|
||||||
|
|
||||||
* **Image Compression**
|
---
|
||||||
Auto-convert large images (>1.5MB) to WebP (requires Pillow).
|
|
||||||
|
|
||||||
* **Multithreaded Downloads**
|
### 📚 Known Names Manager
|
||||||
Adjustable worker count with warnings at unsafe levels.
|
|
||||||
Full threading for creators, single-thread fallback for post mode.
|
|
||||||
|
|
||||||
* **Download Controls**
|
- Add/edit/delete known characters/shows
|
||||||
Cancel files mid-download.
|
- Saves entries in `Known.txt` for automatic folder naming.
|
||||||
Visual progress tracking with per-post summaries.
|
|
||||||
|
|
||||||
* **Dark Mode**
|
---
|
||||||
Clean and modern dark-themed interface.
|
|
||||||
|
### 🔍 Advanced Filtering
|
||||||
|
|
||||||
|
- **Filter by Character(s)**
|
||||||
|
Scope: `Files`, `Post Titles`, `Both`, or `Post Comments (Beta)`
|
||||||
|
|
||||||
|
- **Skip with Words**
|
||||||
|
Skip posts or files based on keywords. Toggle scope.
|
||||||
|
|
||||||
|
- **Media Type Filters**
|
||||||
|
Choose: `All`, `Images/GIFs`, `Videos`, `📦 Only Archives (.zip/.rar)`
|
||||||
|
|
||||||
|
- **🔗 Only Links Mode**
|
||||||
|
Extracts links from post descriptions.
|
||||||
|
|
||||||
|
- **Skip Archives**
|
||||||
|
Ignore `.zip`/`.rar` unless in "Only Archives" mode.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 📖 Manga/Comic Mode (Creator URLs Only)
|
||||||
|
|
||||||
|
- Downloads posts oldest-to-newest.
|
||||||
|
|
||||||
|
**Filename Style Toggle:**
|
||||||
|
- `Post Title` (default): Names first file in post after title.
|
||||||
|
- `Original File`: Uses original file names.
|
||||||
|
|
||||||
|
- Uses manga/series title for filtering and folder naming.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 🖼️ Image Compression
|
||||||
|
|
||||||
|
- Converts large images to **WebP** if it significantly reduces size.
|
||||||
|
- Requires `Pillow` library.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 🖼 Download Thumbnails Only
|
||||||
|
|
||||||
|
- Option to fetch only small preview images.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### ⚙️ Multithreaded Downloads
|
||||||
|
|
||||||
|
- Adjustable threads for:
|
||||||
|
- Multiple post processing (creator feeds)
|
||||||
|
- File-level concurrency (within a post)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### ⏯ Download Controls
|
||||||
|
|
||||||
|
- Start and cancel active operations.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 🌙 Dark Mode Interface
|
||||||
|
|
||||||
|
- Modern, dark-themed GUI for comfort and clarity.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 🔧 Backend Enhancements
|
## 🔧 Backend Enhancements
|
||||||
|
|
||||||
* **Retry Logic**
|
### ♻️ Retry Logic
|
||||||
Auto-retries individual failed files before skipping.
|
|
||||||
Logs all failures with HTTP codes and reasons.
|
|
||||||
|
|
||||||
* **Hash-Based Deduplication**
|
- Retries failed file and chunk downloads before skipping.
|
||||||
Prevents redownloading of previously saved files.
|
|
||||||
|
|
||||||
* **Smart Naming**
|
---
|
||||||
Cleans and standardizes inconsistent post titles.
|
|
||||||
Adds page indices for manga.
|
|
||||||
|
|
||||||
* **Efficient Logging**
|
### 🧬 Session-wide Deduplication
|
||||||
Toggle between basic and advanced views.
|
|
||||||
Live feedback with color-coded logs.
|
- Uses **MD5 hashes** to avoid saving identical files during a session.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 🧹 Smart Naming & Cleanup
|
||||||
|
|
||||||
|
- Cleans special characters in names.
|
||||||
|
- Applies numeric suffixes on collision.
|
||||||
|
- Removes specified unwanted words.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 📋 Efficient Logging
|
||||||
|
|
||||||
|
- Toggle verbosity: `Basic` (important) or `Full` (everything).
|
||||||
|
- Separate panel for extracted external links.
|
||||||
|
- Real-time feedback with clear statuses.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 📦 Installation
|
## 📦 Installation
|
||||||
|
|
||||||
### Requirements
|
### Requirements
|
||||||
|
- Python 3.6+
|
||||||
|
- Pip (Python package manager)
|
||||||
|
|
||||||
* Python 3.6+
|
### Install Libraries
|
||||||
* Pip packages:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install PyQt5 requests Pillow
|
pip install PyQt5 requests Pillow
|
||||||
|
|
||||||
|
|||||||
325
tour.py
325
tour.py
@@ -1,325 +0,0 @@
|
|||||||
import sys
|
|
||||||
import traceback # Added for enhanced error reporting
|
|
||||||
from PyQt5.QtWidgets import (
|
|
||||||
QApplication, QDialog, QWidget, QLabel, QPushButton, QVBoxLayout, QHBoxLayout,
|
|
||||||
QStackedWidget, QSpacerItem, QSizePolicy, QCheckBox, QDesktopWidget
|
|
||||||
)
|
|
||||||
from PyQt5.QtCore import Qt, QSettings, pyqtSignal
|
|
||||||
|
|
||||||
class TourStepWidget(QWidget):
|
|
||||||
"""A single step/page in the tour."""
|
|
||||||
def __init__(self, title_text, content_text, parent=None):
|
|
||||||
super().__init__(parent)
|
|
||||||
layout = QVBoxLayout(self)
|
|
||||||
layout.setContentsMargins(20, 20, 20, 20)
|
|
||||||
layout.setSpacing(10) # Adjusted spacing between title and content for bullet points
|
|
||||||
|
|
||||||
title_label = QLabel(title_text)
|
|
||||||
title_label.setAlignment(Qt.AlignCenter)
|
|
||||||
# Increased padding-bottom for more space below title
|
|
||||||
title_label.setStyleSheet("font-size: 18px; font-weight: bold; color: #E0E0E0; padding-bottom: 15px;")
|
|
||||||
|
|
||||||
content_label = QLabel(content_text)
|
|
||||||
content_label.setWordWrap(True)
|
|
||||||
content_label.setAlignment(Qt.AlignLeft)
|
|
||||||
content_label.setTextFormat(Qt.RichText)
|
|
||||||
# Adjusted line-height for bullet point readability
|
|
||||||
content_label.setStyleSheet("font-size: 11pt; color: #C8C8C8; line-height: 1.8;")
|
|
||||||
|
|
||||||
layout.addWidget(title_label)
|
|
||||||
layout.addWidget(content_label)
|
|
||||||
layout.addStretch(1)
|
|
||||||
|
|
||||||
class TourDialog(QDialog):
|
|
||||||
"""
|
|
||||||
A dialog that shows a multi-page tour to the user.
|
|
||||||
Includes a "Never show again" checkbox.
|
|
||||||
Uses QSettings to remember this preference.
|
|
||||||
"""
|
|
||||||
tour_finished_normally = pyqtSignal()
|
|
||||||
tour_skipped = pyqtSignal()
|
|
||||||
|
|
||||||
CONFIG_ORGANIZATION_NAME = "KemonoDownloader"
|
|
||||||
CONFIG_APP_NAME_TOUR = "ApplicationTour"
|
|
||||||
TOUR_SHOWN_KEY = "neverShowTourAgainV3" # Updated key for new tour content
|
|
||||||
|
|
||||||
def __init__(self, parent=None):
|
|
||||||
super().__init__(parent)
|
|
||||||
self.settings = QSettings(self.CONFIG_ORGANIZATION_NAME, self.CONFIG_APP_NAME_TOUR)
|
|
||||||
self.current_step = 0
|
|
||||||
|
|
||||||
self.setWindowTitle("Welcome to Kemono Downloader!")
|
|
||||||
self.setModal(True)
|
|
||||||
# Set fixed square size, smaller than main window
|
|
||||||
self.setFixedSize(600, 620) # Slightly adjusted for potentially more text
|
|
||||||
self.setStyleSheet("""
|
|
||||||
QDialog {
|
|
||||||
background-color: #2E2E2E;
|
|
||||||
border: 1px solid #5A5A5A;
|
|
||||||
}
|
|
||||||
QLabel {
|
|
||||||
color: #E0E0E0;
|
|
||||||
}
|
|
||||||
QCheckBox {
|
|
||||||
color: #C0C0C0;
|
|
||||||
font-size: 10pt;
|
|
||||||
spacing: 5px;
|
|
||||||
}
|
|
||||||
QCheckBox::indicator {
|
|
||||||
width: 13px;
|
|
||||||
height: 13px;
|
|
||||||
}
|
|
||||||
QPushButton {
|
|
||||||
background-color: #555;
|
|
||||||
color: #F0F0F0;
|
|
||||||
border: 1px solid #6A6A6A;
|
|
||||||
padding: 8px 15px;
|
|
||||||
border-radius: 4px;
|
|
||||||
min-height: 25px;
|
|
||||||
font-size: 11pt;
|
|
||||||
}
|
|
||||||
QPushButton:hover {
|
|
||||||
background-color: #656565;
|
|
||||||
}
|
|
||||||
QPushButton:pressed {
|
|
||||||
background-color: #4A4A4A;
|
|
||||||
}
|
|
||||||
""")
|
|
||||||
self._init_ui()
|
|
||||||
self._center_on_screen()
|
|
||||||
|
|
||||||
def _center_on_screen(self):
|
|
||||||
"""Centers the dialog on the screen."""
|
|
||||||
try:
|
|
||||||
screen_geometry = QDesktopWidget().screenGeometry()
|
|
||||||
dialog_geometry = self.frameGeometry()
|
|
||||||
center_point = screen_geometry.center()
|
|
||||||
dialog_geometry.moveCenter(center_point)
|
|
||||||
self.move(dialog_geometry.topLeft())
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[Tour] Error centering dialog: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
def _init_ui(self):
|
|
||||||
main_layout = QVBoxLayout(self)
|
|
||||||
main_layout.setContentsMargins(0, 0, 0, 0)
|
|
||||||
main_layout.setSpacing(0)
|
|
||||||
|
|
||||||
self.stacked_widget = QStackedWidget()
|
|
||||||
main_layout.addWidget(self.stacked_widget, 1)
|
|
||||||
|
|
||||||
# --- Define Tour Steps with Updated Content ---
|
|
||||||
step1_content = (
|
|
||||||
"Hello! This quick tour will walk you through the main features of the Kemono Downloader."
|
|
||||||
"<ul>"
|
|
||||||
"<li>Our goal is to help you easily download content from Kemono and Coomer.</li>"
|
|
||||||
"<li>Use the <b>Next</b> and <b>Back</b> buttons to navigate.</li>"
|
|
||||||
"<li>Click <b>Skip Tour</b> to close this guide at any time.</li>"
|
|
||||||
"<li>Check <b>'Never show this tour again'</b> if you don't want to see this on future startups.</li>"
|
|
||||||
"</ul>"
|
|
||||||
)
|
|
||||||
self.step1 = TourStepWidget("👋 Welcome!", step1_content)
|
|
||||||
|
|
||||||
step2_content = (
|
|
||||||
"Let's start with the basics for downloading:"
|
|
||||||
"<ul>"
|
|
||||||
"<li><b>🔗 Kemono Creator/Post URL:</b><br>"
|
|
||||||
" Paste the full web address (URL) of a creator's page (e.g., <i>https://kemono.su/patreon/user/12345</i>) "
|
|
||||||
"or a specific post (e.g., <i>.../post/98765</i>).</li><br>"
|
|
||||||
"<li><b>📁 Download Location:</b><br>"
|
|
||||||
" Click 'Browse...' to choose a folder on your computer where all downloaded files will be saved. "
|
|
||||||
"This is required unless you are using 'Only Links' mode.</li><br>"
|
|
||||||
"<li><b>📄 Page Range (Creator URLs only):</b><br>"
|
|
||||||
" If downloading from a creator's page, you can specify a range of pages (e.g., pages 2 to 5). "
|
|
||||||
"Leave blank for all pages. This is disabled for single post URLs or when <b>Manga/Comic Mode</b> is active.</li>"
|
|
||||||
"</ul>"
|
|
||||||
)
|
|
||||||
self.step2 = TourStepWidget("① Getting Started", step2_content)
|
|
||||||
|
|
||||||
step3_content = (
|
|
||||||
"Refine what you download with these filters:"
|
|
||||||
"<ul>"
|
|
||||||
"<li><b>🎯 Filter by Character(s):</b><br>"
|
|
||||||
" Enter character names, comma-separated (e.g., <i>Tifa, Aerith</i>). "
|
|
||||||
" <ul><li>In <b>Normal Mode</b>, this filters individual files by matching their filenames.</li>"
|
|
||||||
" <li>In <b>Manga/Comic Mode</b>, this filters entire posts by matching the post title. Useful for targeting specific series.</li>"
|
|
||||||
" <li>Also helps in folder naming if 'Separate Folders' is enabled.</li></ul></li><br>"
|
|
||||||
"<li><b>🚫 Skip with Words:</b><br>"
|
|
||||||
" Enter words, comma-separated (e.g., <i>WIP, sketch, preview</i>). "
|
|
||||||
" The <b>Scope</b> button (next to this input) cycles how this filter applies:"
|
|
||||||
" <ul><li><i>Scope: Files:</i> Skips files if their names contain any of these words.</li>"
|
|
||||||
" <li><i>Scope: Posts:</i> Skips entire posts if their titles contain any of these words.</li>"
|
|
||||||
" <li><i>Scope: Both:</i> Applies both file and post title skipping.</li></ul></li><br>"
|
|
||||||
"<li><b>Filter Files (Radio Buttons):</b> Choose what to download:"
|
|
||||||
" <ul>"
|
|
||||||
" <li><i>All:</i> Downloads all file types found.</li>"
|
|
||||||
" <li><i>Images/GIFs:</i> Only common image formats and GIFs.</li>"
|
|
||||||
" <li><i>Videos:</i> Only common video formats.</li>"
|
|
||||||
" <li><b><i>📦 Only Archives:</i></b> Exclusively downloads <b>.zip</b> and <b>.rar</b> files. When selected, 'Skip .zip' and 'Skip .rar' checkboxes are automatically disabled and unchecked.</li>"
|
|
||||||
" <li><i>🔗 Only Links:</i> Extracts and displays external links from post descriptions instead of downloading files.</li>"
|
|
||||||
" </ul></li>"
|
|
||||||
"</ul>"
|
|
||||||
)
|
|
||||||
self.step3 = TourStepWidget("② Filtering Downloads", step3_content)
|
|
||||||
|
|
||||||
step4_content = (
|
|
||||||
"More options to customize your downloads:"
|
|
||||||
"<ul>"
|
|
||||||
"<li><b>Skip .zip / Skip .rar:</b> Check these to avoid downloading these archive file types. "
|
|
||||||
" <i>(Note: These are disabled and ignored if '📦 Only Archives' mode is selected).</i></li><br>"
|
|
||||||
"<li><b>Download Thumbnails Only:</b> Downloads small preview images instead of full-sized files (if available).</li><br>"
|
|
||||||
"<li><b>Compress Large Images:</b> If the 'Pillow' library is installed, images larger than 1.5MB will be converted to WebP format if the WebP version is significantly smaller.</li><br>"
|
|
||||||
"<li><b>🗄️ Custom Folder Name (Single Post Only):</b><br>"
|
|
||||||
" If you are downloading a single specific post URL AND 'Separate Folders by Name/Title' is enabled, "
|
|
||||||
"you can enter a custom name here for that post's download folder.</li>"
|
|
||||||
"</ul>"
|
|
||||||
)
|
|
||||||
self.step4 = TourStepWidget("③ Fine-Tuning Downloads", step4_content)
|
|
||||||
|
|
||||||
step5_content = (
|
|
||||||
"Organize your downloads and manage performance:"
|
|
||||||
"<ul>"
|
|
||||||
"<li><b>⚙️ Separate Folders by Name/Title:</b> Creates subfolders based on the 'Filter by Character(s)' input or post titles (can use the 'Known Shows/Characters' list as a fallback for folder names).</li><br>"
|
|
||||||
"<li><b>Subfolder per Post:</b> If 'Separate Folders' is on, this creates an additional subfolder for <i>each individual post</i> inside the main character/title folder.</li><br>"
|
|
||||||
"<li><b>🚀 Use Multithreading (Threads):</b> Enables faster downloads for creator pages by processing multiple posts or files concurrently. The number of threads can be adjusted. Single post URLs are processed using a single thread for post data but can use multiple threads for file downloads within that post.</li><br>"
|
|
||||||
"<li><b>📖 Manga/Comic Mode (Creator URLs only):</b> Tailored for sequential content."
|
|
||||||
" <ul>"
|
|
||||||
" <li>Downloads posts from <b>oldest to newest</b>.</li>"
|
|
||||||
" <li>The 'Page Range' input is disabled as all posts are fetched.</li>"
|
|
||||||
" <li>A <b>filename style toggle button</b> (e.g., 'Name: Post Title' or 'Name: Original File') appears in the top-right of the log area when this mode is active for a creator feed. Click it to change naming:"
|
|
||||||
" <ul>"
|
|
||||||
" <li><b><i>Name: Post Title (Default):</i></b> The first file in a post is named after the post's title (e.g., <i>MyMangaChapter1.jpg</i>). Subsequent files in the <i>same post</i> (if any) will retain their original filenames.</li>"
|
|
||||||
" <li><b><i>Name: Original File:</i></b> All files will attempt to keep their original filenames as provided by the site (e.g., <i>001.jpg, page_02.png</i>). You'll see a recommendation to use 'Post Title' style if you choose this.</li>"
|
|
||||||
" </ul>"
|
|
||||||
" </li>"
|
|
||||||
" <li>For best results with 'Name: Post Title' style, use the 'Filter by Character(s)' field with the manga/series title.</li>"
|
|
||||||
" </ul></li><br>"
|
|
||||||
"<li><b>🎭 Known Shows/Characters:</b> Add names here (e.g., <i>Game Title, Series Name, Character Full Name</i>). These are used for automatic folder creation when 'Separate Folders' is on and no specific 'Filter by Character(s)' is provided for a post.</li>"
|
|
||||||
"</ul>"
|
|
||||||
)
|
|
||||||
self.step5 = TourStepWidget("④ Organization & Performance", step5_content)
|
|
||||||
|
|
||||||
step6_content = (
|
|
||||||
"Monitoring and Controls:"
|
|
||||||
"<ul>"
|
|
||||||
"<li><b>📜 Progress Log / Extracted Links Log:</b> Shows detailed download messages. If '🔗 Only Links' mode is active, this area displays the extracted links.</li><br>"
|
|
||||||
"<li><b>Show External Links in Log:</b> If checked, a secondary log panel appears below the main log to display any external links found in post descriptions. <i>(This is disabled if '🔗 Only Links' or '📦 Only Archives' mode is active).</i></li><br>"
|
|
||||||
"<li><b>Log Verbosity (Show Basic/Full Log):</b> Toggles the main log between showing all messages (Full) or only key summaries, errors, and warnings (Basic).</li><br>"
|
|
||||||
"<li><b>🔄 Reset:</b> Clears all input fields, logs, and resets temporary settings to their defaults. Can only be used when no download is active.</li><br>"
|
|
||||||
"<li><b>⬇️ Start Download / ❌ Cancel:</b> These buttons initiate or stop the current download/extraction process.</li>"
|
|
||||||
"</ul>"
|
|
||||||
"<br>You're all set! Click <b>'Finish'</b> to close the tour and start using the downloader."
|
|
||||||
)
|
|
||||||
self.step6 = TourStepWidget("⑤ Logs & Final Controls", step6_content)
|
|
||||||
|
|
||||||
|
|
||||||
self.tour_steps = [self.step1, self.step2, self.step3, self.step4, self.step5, self.step6]
|
|
||||||
for step_widget in self.tour_steps:
|
|
||||||
self.stacked_widget.addWidget(step_widget)
|
|
||||||
|
|
||||||
bottom_controls_layout = QVBoxLayout()
|
|
||||||
bottom_controls_layout.setContentsMargins(15, 10, 15, 15) # Adjusted margins
|
|
||||||
bottom_controls_layout.setSpacing(10)
|
|
||||||
|
|
||||||
self.never_show_again_checkbox = QCheckBox("Never show this tour again")
|
|
||||||
bottom_controls_layout.addWidget(self.never_show_again_checkbox, 0, Qt.AlignLeft)
|
|
||||||
|
|
||||||
buttons_layout = QHBoxLayout()
|
|
||||||
buttons_layout.setSpacing(10)
|
|
||||||
|
|
||||||
self.skip_button = QPushButton("Skip Tour")
|
|
||||||
self.skip_button.clicked.connect(self._skip_tour_action)
|
|
||||||
|
|
||||||
self.back_button = QPushButton("Back")
|
|
||||||
self.back_button.clicked.connect(self._previous_step)
|
|
||||||
self.back_button.setEnabled(False)
|
|
||||||
|
|
||||||
self.next_button = QPushButton("Next")
|
|
||||||
self.next_button.clicked.connect(self._next_step_action)
|
|
||||||
self.next_button.setDefault(True)
|
|
||||||
|
|
||||||
buttons_layout.addWidget(self.skip_button)
|
|
||||||
buttons_layout.addStretch(1)
|
|
||||||
buttons_layout.addWidget(self.back_button)
|
|
||||||
buttons_layout.addWidget(self.next_button)
|
|
||||||
|
|
||||||
bottom_controls_layout.addLayout(buttons_layout)
|
|
||||||
main_layout.addLayout(bottom_controls_layout)
|
|
||||||
|
|
||||||
self._update_button_states()
|
|
||||||
|
|
||||||
def _handle_exit_actions(self):
|
|
||||||
if self.never_show_again_checkbox.isChecked():
|
|
||||||
self.settings.setValue(self.TOUR_SHOWN_KEY, True)
|
|
||||||
self.settings.sync()
|
|
||||||
# else:
|
|
||||||
# print(f"[Tour] '{self.TOUR_SHOWN_KEY}' setting not set to True (checkbox was unchecked on exit).")
|
|
||||||
|
|
||||||
|
|
||||||
def _next_step_action(self):
|
|
||||||
if self.current_step < len(self.tour_steps) - 1:
|
|
||||||
self.current_step += 1
|
|
||||||
self.stacked_widget.setCurrentIndex(self.current_step)
|
|
||||||
else:
|
|
||||||
self._handle_exit_actions()
|
|
||||||
self.tour_finished_normally.emit()
|
|
||||||
self.accept()
|
|
||||||
self._update_button_states()
|
|
||||||
|
|
||||||
def _previous_step(self):
|
|
||||||
if self.current_step > 0:
|
|
||||||
self.current_step -= 1
|
|
||||||
self.stacked_widget.setCurrentIndex(self.current_step)
|
|
||||||
self._update_button_states()
|
|
||||||
|
|
||||||
def _skip_tour_action(self):
|
|
||||||
self._handle_exit_actions()
|
|
||||||
self.tour_skipped.emit()
|
|
||||||
self.reject()
|
|
||||||
|
|
||||||
def _update_button_states(self):
|
|
||||||
if self.current_step == len(self.tour_steps) - 1:
|
|
||||||
self.next_button.setText("Finish")
|
|
||||||
else:
|
|
||||||
self.next_button.setText("Next")
|
|
||||||
self.back_button.setEnabled(self.current_step > 0)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def run_tour_if_needed(parent_app_window):
|
|
||||||
try:
|
|
||||||
settings = QSettings(TourDialog.CONFIG_ORGANIZATION_NAME, TourDialog.CONFIG_APP_NAME_TOUR)
|
|
||||||
never_show_again = settings.value(TourDialog.TOUR_SHOWN_KEY, False, type=bool)
|
|
||||||
|
|
||||||
if never_show_again:
|
|
||||||
return QDialog.Rejected
|
|
||||||
|
|
||||||
tour_dialog = TourDialog(parent_app_window)
|
|
||||||
result = tour_dialog.exec_()
|
|
||||||
return result
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[Tour] CRITICAL ERROR in run_tour_if_needed: {e}")
|
|
||||||
traceback.print_exc()
|
|
||||||
return QDialog.Rejected
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
app = QApplication(sys.argv)
|
|
||||||
|
|
||||||
# --- For testing: force the tour to show by resetting the flag ---
|
|
||||||
# print("[Tour Test] Resetting 'Never show again' flag for testing purposes.")
|
|
||||||
# test_settings = QSettings(TourDialog.CONFIG_ORGANIZATION_NAME, TourDialog.CONFIG_APP_NAME_TOUR)
|
|
||||||
# test_settings.setValue(TourDialog.TOUR_SHOWN_KEY, False) # Set to False to force tour
|
|
||||||
# test_settings.sync()
|
|
||||||
# --- End testing block ---
|
|
||||||
|
|
||||||
print("[Tour Test] Running tour standalone...")
|
|
||||||
result = TourDialog.run_tour_if_needed(None)
|
|
||||||
|
|
||||||
if result == QDialog.Accepted:
|
|
||||||
print("[Tour Test] Tour dialog was accepted (Finished).")
|
|
||||||
elif result == QDialog.Rejected:
|
|
||||||
print("[Tour Test] Tour dialog was rejected (Skipped or previously set to 'Never show again').")
|
|
||||||
|
|
||||||
final_settings = QSettings(TourDialog.CONFIG_ORGANIZATION_NAME, TourDialog.CONFIG_APP_NAME_TOUR)
|
|
||||||
print(f"[Tour Test] Final state of '{TourDialog.TOUR_SHOWN_KEY}' in settings: {final_settings.value(TourDialog.TOUR_SHOWN_KEY, False, type=bool)}")
|
|
||||||
|
|
||||||
sys.exit()
|
|
||||||
Reference in New Issue
Block a user