This commit is contained in:
Yuvi9587 2025-07-13 10:22:06 -07:00
parent f0bf74da16
commit dbdf82a079
18 changed files with 7032 additions and 1123 deletions

View File

@ -0,0 +1,97 @@
Fonts are (c) Bitstream (see below). DejaVu changes are in public domain.
Glyphs imported from Arev fonts are (c) Tavmjong Bah (see below)
Bitstream Vera Fonts Copyright
------------------------------
Copyright (c) 2003 by Bitstream, Inc. All Rights Reserved. Bitstream Vera is
a trademark of Bitstream, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of the fonts accompanying this license ("Fonts") and associated
documentation files (the "Font Software"), to reproduce and distribute the
Font Software, including without limitation the rights to use, copy, merge,
publish, distribute, and/or sell copies of the Font Software, and to permit
persons to whom the Font Software is furnished to do so, subject to the
following conditions:
The above copyright and trademark notices and this permission notice shall
be included in all copies of one or more of the Font Software typefaces.
The Font Software may be modified, altered, or added to, and in particular
the designs of glyphs or characters in the Fonts may be modified and
additional glyphs or characters may be added to the Fonts, only if the fonts
are renamed to names not containing either the words "Bitstream" or the word
"Vera".
This License becomes null and void to the extent applicable to Fonts or Font
Software that has been modified and is distributed under the "Bitstream
Vera" names.
The Font Software may be sold as part of a larger software package but no
copy of one or more of the Font Software typefaces may be sold by itself.
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF COPYRIGHT, PATENT,
TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL BITSTREAM OR THE GNOME
FOUNDATION BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, INCLUDING
ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM OTHER DEALINGS IN THE
FONT SOFTWARE.
Except as contained in this notice, the names of Gnome, the Gnome
Foundation, and Bitstream Inc., shall not be used in advertising or
otherwise to promote the sale, use or other dealings in this Font Software
without prior written authorization from the Gnome Foundation or Bitstream
Inc., respectively. For further information, contact: fonts at gnome dot
org.
Arev Fonts Copyright
------------------------------
Copyright (c) 2006 by Tavmjong Bah. All Rights Reserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of the fonts accompanying this license ("Fonts") and
associated documentation files (the "Font Software"), to reproduce
and distribute the modifications to the Bitstream Vera Font Software,
including without limitation the rights to use, copy, merge, publish,
distribute, and/or sell copies of the Font Software, and to permit
persons to whom the Font Software is furnished to do so, subject to
the following conditions:
The above copyright and trademark notices and this permission notice
shall be included in all copies of one or more of the Font Software
typefaces.
The Font Software may be modified, altered, or added to, and in
particular the designs of glyphs or characters in the Fonts may be
modified and additional glyphs or characters may be added to the
Fonts, only if the fonts are renamed to names not containing either
the words "Tavmjong Bah" or the word "Arev".
This License becomes null and void to the extent applicable to Fonts
or Font Software that has been modified and is distributed under the
"Tavmjong Bah Arev" names.
The Font Software may be sold as part of a larger software package but
no copy of one or more of the Font Software typefaces may be sold by
itself.
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL
TAVMJONG BAH BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
OTHER DEALINGS IN THE FONT SOFTWARE.
Except as contained in this notice, the name of Tavmjong Bah shall not
be used in advertising or otherwise to promote the sale, use or other
dealings in this Font Software without prior written authorization
from Tavmjong Bah. For further information, contact: tavmjong @ free
. fr.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

5529
main_window_old.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,12 +1,10 @@
# --- Standard Library Imports ---
import time
import traceback
from urllib.parse import urlparse
# --- Third-Party Library Imports ---
import json # Ensure json is imported
import requests
# --- Local Application Imports ---
# (Keep the rest of your imports)
from ..utils.network_utils import extract_post_info, prepare_cookies_for_request
from ..config.constants import (
STYLE_DATE_POST_TITLE
@ -15,36 +13,24 @@ from ..config.constants import (
def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None, pause_event=None, cookies_dict=None):
"""
Fetches a single page of posts from the API with retry logic.
Args:
api_url_base (str): The base URL for the user's posts.
headers (dict): The request headers.
offset (int): The offset for pagination.
logger (callable): Function to log messages.
cancellation_event (threading.Event): Event to signal cancellation.
pause_event (threading.Event): Event to signal pause.
cookies_dict (dict): A dictionary of cookies to include in the request.
Returns:
list: A list of post data dictionaries from the API.
Raises:
RuntimeError: If the fetch fails after all retries or encounters a non-retryable error.
Fetches a single page of posts from the API with robust retry logic.
NEW: Requests only essential fields to keep the response size small and reliable.
"""
if cancellation_event and cancellation_event.is_set():
logger(" Fetch cancelled before request.")
raise RuntimeError("Fetch operation cancelled by user.")
if pause_event and pause_event.is_set():
logger(" Post fetching paused...")
while pause_event.is_set():
if cancellation_event and cancellation_event.is_set():
logger(" Post fetching cancelled while paused.")
raise RuntimeError("Fetch operation cancelled by user.")
raise RuntimeError("Fetch operation cancelled by user while paused.")
time.sleep(0.5)
logger(" Post fetching resumed.")
paginated_url = f'{api_url_base}?o={offset}'
# --- MODIFICATION: Added `fields` to the URL to request only metadata ---
# This prevents the large 'content' field from being included in the list, avoiding timeouts.
fields_to_request = "id,user,service,title,shared_file,added,published,edited,file,attachments,tags"
paginated_url = f'{api_url_base}?o={offset}&fields={fields_to_request}'
max_retries = 3
retry_delay = 5
@ -52,22 +38,18 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
if cancellation_event and cancellation_event.is_set():
raise RuntimeError("Fetch operation cancelled by user during retry loop.")
log_message = f" Fetching: {paginated_url} (Page approx. {offset // 50 + 1})"
log_message = f" Fetching post list: {api_url_base}?o={offset} (Page approx. {offset // 50 + 1})"
if attempt > 0:
log_message += f" (Attempt {attempt + 1}/{max_retries})"
logger(log_message)
try:
response = requests.get(paginated_url, headers=headers, timeout=(15, 90), cookies=cookies_dict)
# We can now remove the streaming logic as the response will be small and fast.
response = requests.get(paginated_url, headers=headers, timeout=(15, 60), cookies=cookies_dict)
response.raise_for_status()
if 'application/json' not in response.headers.get('Content-Type', '').lower():
logger(f"⚠️ Unexpected content type from API: {response.headers.get('Content-Type')}. Body: {response.text[:200]}")
return []
return response.json()
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
except requests.exceptions.RequestException as e:
logger(f" ⚠️ Retryable network error on page fetch (Attempt {attempt + 1}): {e}")
if attempt < max_retries - 1:
delay = retry_delay * (2 ** attempt)
@ -76,18 +58,46 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
continue
else:
logger(f" ❌ Failed to fetch page after {max_retries} attempts.")
raise RuntimeError(f"Timeout or connection error fetching offset {offset}")
except requests.exceptions.RequestException as e:
err_msg = f"Error fetching offset {offset}: {e}"
if e.response is not None:
err_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})"
raise RuntimeError(err_msg)
except ValueError as e: # JSON decode error
raise RuntimeError(f"Error decoding JSON from offset {offset}: {e}. Response: {response.text[:200]}")
raise RuntimeError(f"Network error fetching offset {offset}")
except json.JSONDecodeError as e:
logger(f" ❌ Failed to decode JSON on page fetch (Attempt {attempt + 1}): {e}")
if attempt < max_retries - 1:
delay = retry_delay * (2 ** attempt)
logger(f" Retrying in {delay} seconds...")
time.sleep(delay)
continue
else:
raise RuntimeError(f"JSONDecodeError fetching offset {offset}")
raise RuntimeError(f"Failed to fetch page {paginated_url} after all attempts.")
def fetch_single_post_data(api_domain, service, user_id, post_id, headers, logger, cookies_dict=None):
"""
--- NEW FUNCTION ---
Fetches the full data, including the 'content' field, for a single post.
"""
post_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}"
logger(f" Fetching full content for post ID {post_id}...")
try:
# Use streaming here as a precaution for single posts that are still very large.
with requests.get(post_api_url, headers=headers, timeout=(15, 300), cookies=cookies_dict, stream=True) as response:
response.raise_for_status()
response_body = b""
for chunk in response.iter_content(chunk_size=8192):
response_body += chunk
full_post_data = json.loads(response_body)
# The API sometimes wraps the post in a list, handle that.
if isinstance(full_post_data, list) and full_post_data:
return full_post_data[0]
return full_post_data
except Exception as e:
logger(f" ❌ Failed to fetch full content for post {post_id}: {e}")
return None
def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger, cancellation_event=None, pause_event=None, cookies_dict=None):
"""Fetches all comments for a specific post."""
if cancellation_event and cancellation_event.is_set():

View File

@ -20,6 +20,26 @@ try:
from PIL import Image
except ImportError:
Image = None
#
try:
from fpdf import FPDF
# Add a simple class to handle the header/footer for stories
class PDF(FPDF):
def header(self):
pass # No header
def footer(self):
self.set_y(-15)
self.set_font('Arial', 'I', 8)
self.cell(0, 10, 'Page %s' % self.page_no(), 0, 0, 'C')
except ImportError:
FPDF = None
try:
from docx import Document
except ImportError:
Document = None
# --- PyQt5 Imports ---
from PyQt5 .QtCore import Qt ,QThread ,pyqtSignal ,QMutex ,QMutexLocker ,QObject ,QTimer ,QSettings ,QStandardPaths ,QCoreApplication ,QUrl ,QSize ,QProcess
# --- Local Application Imports ---
@ -48,7 +68,8 @@ class PostProcessorSignals (QObject ):
file_progress_signal =pyqtSignal (str ,object )
file_successfully_downloaded_signal =pyqtSignal (dict )
missed_character_post_signal =pyqtSignal (str ,str )
worker_finished_signal = pyqtSignal(tuple)
class PostProcessorWorker:
def __init__ (self ,post_data ,download_root ,known_names ,
filter_character_list ,emitter ,
@ -81,6 +102,10 @@ class PostProcessorWorker:
keep_in_post_duplicates=False,
session_file_path=None,
session_lock=None,
text_only_scope=None,
text_export_format='txt',
single_pdf_mode=False,
project_root_dir=None,
):
self .post =post_data
self .download_root =download_root
@ -134,6 +159,10 @@ class PostProcessorWorker:
self.keep_in_post_duplicates = keep_in_post_duplicates
self.session_file_path = session_file_path
self.session_lock = session_lock
self.text_only_scope = text_only_scope
self.text_export_format = text_export_format
self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE
self.project_root_dir = project_root_dir
if self .compress_images and Image is None :
self .logger ("⚠️ Image compression disabled: Pillow library not found.")
@ -557,6 +586,8 @@ class PostProcessorWorker:
final_total_for_progress =total_size_bytes if download_successful_flag and total_size_bytes >0 else downloaded_size_bytes
self ._emit_signal ('file_progress',api_original_filename ,(downloaded_size_bytes ,final_total_for_progress ))
# --- Start of Replacement Block ---
# Rescue download if an IncompleteRead error occurred but the file is complete
if (not download_successful_flag and
isinstance(last_exception_for_retry_later, http.client.IncompleteRead) and
@ -614,33 +645,32 @@ class PostProcessorWorker:
is_img_for_compress_check = is_image(api_original_filename)
if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024):
# ... (This block for image compression remains the same)
self .logger (f" Compressing '{api_original_filename }' ({downloaded_size_bytes /(1024 *1024 ):.2f} MB)...")
if self ._check_pause (f"Image compression for '{api_original_filename }'"):return 0 ,1 ,filename_to_save_in_main_path ,was_original_name_kept_flag ,FILE_DOWNLOAD_STATUS_SKIPPED ,None
img_content_for_pillow =None
try :
with open (downloaded_part_file_path ,'rb')as f_img_in :
img_content_for_pillow =BytesIO (f_img_in .read ())
with Image .open (img_content_for_pillow )as img_obj :
if img_obj .mode =='P':img_obj =img_obj .convert ('RGBA')
elif img_obj .mode not in ['RGB','RGBA','L']:img_obj =img_obj .convert ('RGB')
compressed_output_io =BytesIO ()
img_obj .save (compressed_output_io ,format ='WebP',quality =80 ,method =4 )
compressed_size =compressed_output_io .getbuffer ().nbytes
if compressed_size <downloaded_size_bytes *0.9 :
self .logger (f" Compression success: {compressed_size /(1024 *1024 ):.2f} MB.")
data_to_write_io =compressed_output_io
data_to_write_io .seek (0 )
base_name_orig ,_ =os .path .splitext (filename_after_compression )
filename_after_compression =base_name_orig +'.webp'
self .logger (f" Updated filename (compressed): {filename_after_compression }")
else :
self .logger (f" Compression skipped: WebP not significantly smaller.")
if compressed_output_io :compressed_output_io .close ()
except Exception as comp_e :
self .logger (f"❌ Compression failed for '{api_original_filename }': {comp_e }. Saving original.")
finally :
if img_content_for_pillow :img_content_for_pillow .close ()
self.logger(f" Compressing '{api_original_filename}' ({downloaded_size_bytes / (1024 * 1024):.2f} MB)...")
if self._check_pause(f"Image compression for '{api_original_filename}'"): return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
img_content_for_pillow = None
try:
with open(downloaded_part_file_path, 'rb') as f_img_in:
img_content_for_pillow = BytesIO(f_img_in.read())
with Image.open(img_content_for_pillow) as img_obj:
if img_obj.mode == 'P': img_obj = img_obj.convert('RGBA')
elif img_obj.mode not in ['RGB', 'RGBA', 'L']: img_obj = img_obj.convert('RGB')
compressed_output_io = BytesIO()
img_obj.save(compressed_output_io, format='WebP', quality=80, method=4)
compressed_size = compressed_output_io.getbuffer().nbytes
if compressed_size < downloaded_size_bytes * 0.9:
self.logger(f" Compression success: {compressed_size / (1024 * 1024):.2f} MB.")
data_to_write_io = compressed_output_io
data_to_write_io.seek(0)
base_name_orig, _ = os.path.splitext(filename_after_compression)
filename_after_compression = base_name_orig + '.webp'
self.logger(f" Updated filename (compressed): {filename_after_compression}")
else:
self.logger(f" Compression skipped: WebP not significantly smaller.")
if compressed_output_io: compressed_output_io.close()
except Exception as comp_e:
self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original.")
finally:
if img_content_for_pillow: img_content_for_pillow.close()
final_filename_on_disk = filename_after_compression
temp_base, temp_ext = os.path.splitext(final_filename_on_disk)
@ -695,11 +725,14 @@ class PostProcessorWorker:
return 1, 0, final_filename_saved_for_return, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SUCCESS, None
except Exception as save_err:
self.logger(f"->>Save Fail for '{final_filename_on_disk}': {save_err}")
if os.path.exists(final_save_path):
try: os.remove(final_save_path)
except OSError: self.logger(f" -> Failed to remove partially saved file: {final_save_path}")
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
self.logger(f"->>Save Fail for '{final_filename_on_disk}': {save_err}")
if os.path.exists(final_save_path):
try: os.remove(final_save_path)
except OSError: self.logger(f" -> Failed to remove partially saved file: {final_save_path}")
# --- FIX: Report as a permanent failure so it appears in the error dialog ---
permanent_failure_details = { 'file_info': file_info, 'target_folder_path': target_folder_path, 'headers': headers, 'original_post_id_for_log': original_post_id_for_log, 'post_title': post_title, 'file_index_in_post': file_index_in_post, 'num_files_in_this_post': num_files_in_this_post, 'forced_filename_override': filename_to_save_in_main_path, }
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION, permanent_failure_details
finally:
if data_to_write_io and hasattr(data_to_write_io, 'close'):
data_to_write_io.close()
@ -738,14 +771,16 @@ class PostProcessorWorker:
effective_save_folder =target_folder_path
filename_after_styling_and_word_removal =filename_to_save_in_main_path
try :
os .makedirs (effective_save_folder ,exist_ok =True )
except OSError as e :
self .logger (f" ❌ Critical error creating directory '{effective_save_folder }': {e }. Skipping file '{api_original_filename }'.")
if downloaded_part_file_path and os .path .exists (downloaded_part_file_path ):
try :os .remove (downloaded_part_file_path )
except OSError :pass
return 0 ,1 ,api_original_filename ,False ,FILE_DOWNLOAD_STATUS_SKIPPED ,None
try:
os.makedirs(effective_save_folder, exist_ok=True)
except OSError as e:
self.logger(f" ❌ Critical error creating directory '{effective_save_folder}': {e}. Skipping file '{api_original_filename}'.")
if downloaded_part_file_path and os.path.exists(downloaded_part_file_path):
try: os.remove(downloaded_part_file_path)
except OSError: pass
# --- FIX: Report as a permanent failure so it appears in the error dialog ---
permanent_failure_details = { 'file_info': file_info, 'target_folder_path': target_folder_path, 'headers': headers, 'original_post_id_for_log': original_post_id_for_log, 'post_title': post_title, 'file_index_in_post': file_index_in_post, 'num_files_in_this_post': num_files_in_this_post, 'forced_filename_override': filename_to_save_in_main_path, }
return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION, permanent_failure_details
data_to_write_io =None
filename_after_compression =filename_after_styling_and_word_removal
@ -849,8 +884,8 @@ class PostProcessorWorker:
data_to_write_io .close ()
def process (self ):
if self ._check_pause (f"Post processing for ID {self .post .get ('id','N/A')}"):return 0 ,0 ,[],[],[],None
if self .check_cancel ():return 0 ,0 ,[],[],[],None
if self ._check_pause (f"Post processing for ID {self .post .get ('id','N/A')}"):return 0 ,0 ,[],[],[],None, None
if self .check_cancel ():return 0 ,0 ,[],[],[],None, None
current_character_filters =self ._get_current_character_filters ()
kept_original_filenames_for_log =[]
retryable_failures_this_post =[]
@ -986,23 +1021,23 @@ class PostProcessorWorker:
if self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match :
self .logger (f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title [:50 ]}' does not match character filters.")
self ._emit_signal ('missed_character_post',post_title ,"No title match for character filter")
return 0 ,num_potential_files_in_post ,[],[],[],None
return 0 ,num_potential_files_in_post ,[],[],[],None, None
if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match :
self .logger (f" -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id }', Title '{post_title [:50 ]}...'")
if self .emitter and hasattr (self .emitter ,'missed_character_post_signal'):
self ._emit_signal ('missed_character_post',post_title ,"No character match in files or comments (Comments scope)")
return 0 ,num_potential_files_in_post ,[],[],[],None
return 0 ,num_potential_files_in_post ,[],[],[],None, None
if self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH ):
if self ._check_pause (f"Skip words (post title) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
post_title_lower =post_title .lower ()
for skip_word in self .skip_words_list :
if skip_word .lower ()in post_title_lower :
self .logger (f" -> Skip Post (Keyword in Title '{skip_word }'): '{post_title [:50 ]}...'. Scope: {self .skip_words_scope }")
return 0 ,num_potential_files_in_post ,[],[],[],None
return 0 ,num_potential_files_in_post ,[],[],[],None, None
if not self .extract_links_only and self .manga_mode_active and current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and not post_is_candidate_by_title_char_match :
self .logger (f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title [:50 ]}' doesn't match filters.")
self ._emit_signal ('missed_character_post',post_title ,"Manga Mode: No title match for character filter (Title/Both scope)")
return 0 ,num_potential_files_in_post ,[],[],[],None
return 0 ,num_potential_files_in_post ,[],[],[],None, None
if not isinstance (post_attachments ,list ):
self .logger (f"⚠️ Corrupt attachment data for post {post_id } (expected list, got {type (post_attachments )}). Skipping attachments.")
post_attachments =[]
@ -1171,6 +1206,156 @@ class PostProcessorWorker:
break
determined_post_save_path_for_history =os .path .join (base_path_for_post_subfolder ,final_post_subfolder_name )
if self.filter_mode == 'text_only' and not self.extract_links_only:
self.logger(f" Mode: Text Only (Scope: {self.text_only_scope})")
# --- Apply Title-based filters to ensure post is a candidate ---
post_title_lower = post_title.lower()
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH):
for skip_word in self.skip_words_list:
if skip_word.lower() in post_title_lower:
self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'.")
return 0, num_potential_files_in_post, [], [], [], None, None
if current_character_filters and not post_is_candidate_by_title_char_match and not post_is_candidate_by_comment_char_match and not post_is_candidate_by_file_char_match_in_comment_scope:
self.logger(f" -> Skip Post (No character match for text extraction): '{post_title[:50]}...'.")
return 0, num_potential_files_in_post, [], [], [], None, None
# --- Get the text content based on scope ---
raw_text_content = ""
final_post_data = post_data
# Fetch full post data if content is missing and scope is 'content'
if self.text_only_scope == 'content' and 'content' not in final_post_data:
self.logger(f" Post {post_id} is missing 'content' field, fetching full data...")
parsed_url = urlparse(self.api_url_input)
api_domain = parsed_url.netloc
cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain)
from .api_client import fetch_single_post_data # Local import to avoid circular dependency issues
full_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies)
if full_data:
final_post_data = full_data
if self.text_only_scope == 'content':
raw_text_content = final_post_data.get('content', '')
elif self.text_only_scope == 'comments':
try:
parsed_url = urlparse(self.api_url_input)
api_domain = parsed_url.netloc
comments_data = fetch_post_comments(api_domain, self.service, self.user_id, post_id, headers, self.logger, self.cancellation_event, self.pause_event)
if comments_data:
comment_texts = []
for comment in comments_data:
user = comment.get('user', {}).get('name', 'Unknown User')
timestamp = comment.get('updated', 'No Date')
body = strip_html_tags(comment.get('content', ''))
comment_texts.append(f"--- Comment by {user} on {timestamp} ---\n{body}\n")
raw_text_content = "\n".join(comment_texts)
except Exception as e:
self.logger(f" ❌ Error fetching comments for text-only mode: {e}")
if not raw_text_content or not raw_text_content.strip():
self.logger(" -> Skip Saving Text: No content/comments found or fetched.")
return 0, num_potential_files_in_post, [], [], [], None, None
# --- Robust HTML-to-TEXT Conversion ---
paragraph_pattern = re.compile(r'<p.*?>(.*?)</p>', re.IGNORECASE | re.DOTALL)
html_paragraphs = paragraph_pattern.findall(raw_text_content)
cleaned_text = ""
if not html_paragraphs:
self.logger(" ⚠️ No <p> tags found. Falling back to basic HTML cleaning for the whole block.")
text_with_br = re.sub(r'<br\s*/?>', '\n', raw_text_content, flags=re.IGNORECASE)
cleaned_text = re.sub(r'<.*?>', '', text_with_br)
else:
cleaned_paragraphs_list = []
for p_content in html_paragraphs:
p_with_br = re.sub(r'<br\s*/?>', '\n', p_content, flags=re.IGNORECASE)
p_cleaned = re.sub(r'<.*?>', '', p_with_br)
p_final = html.unescape(p_cleaned).strip()
if p_final:
cleaned_paragraphs_list.append(p_final)
cleaned_text = '\n\n'.join(cleaned_paragraphs_list)
cleaned_text = cleaned_text.replace('', '...')
# --- Logic for Single PDF Mode (File-based) ---
if self.single_pdf_mode:
if not cleaned_text:
return 0, 0, [], [], [], None, None
content_data = {
'title': post_title,
'content': cleaned_text,
'published': self.post.get('published') or self.post.get('added')
}
temp_dir = os.path.join(self.app_base_dir, "appdata")
os.makedirs(temp_dir, exist_ok=True)
temp_filename = f"tmp_{post_id}_{uuid.uuid4().hex[:8]}.json"
temp_filepath = os.path.join(temp_dir, temp_filename)
try:
with open(temp_filepath, 'w', encoding='utf-8') as f:
json.dump(content_data, f, indent=2)
self.logger(f" Saved temporary text for '{post_title}' for single PDF compilation.")
return 0, 0, [], [], [], None, temp_filepath
except Exception as e:
self.logger(f" ❌ Failed to write temporary file for single PDF: {e}")
return 0, 0, [], [], [], None, None
# --- Logic for Individual File Saving ---
else:
file_extension = self.text_export_format
txt_filename = clean_filename(post_title) + f".{file_extension}"
final_save_path = os.path.join(determined_post_save_path_for_history, txt_filename)
try:
os.makedirs(determined_post_save_path_for_history, exist_ok=True)
base, ext = os.path.splitext(final_save_path)
counter = 1
while os.path.exists(final_save_path):
final_save_path = f"{base}_{counter}{ext}"
counter += 1
if file_extension == 'pdf':
if FPDF:
self.logger(f" Converting to PDF...")
pdf = PDF()
font_path = os.path.join(self.app_base_dir, 'data', 'dejavu-sans', 'DejaVuSans.ttf')
try:
if not os.path.exists(font_path): raise RuntimeError(f"Font file not found: {font_path}")
pdf.add_font('DejaVu', '', font_path, uni=True)
pdf.set_font('DejaVu', '', 12)
except Exception as font_error:
self.logger(f" ⚠️ Could not load DejaVu font: {font_error}. Falling back to Arial.")
pdf.set_font('Arial', '', 12)
pdf.add_page()
pdf.multi_cell(0, 5, cleaned_text)
pdf.output(final_save_path)
else:
self.logger(f" ⚠️ Cannot create PDF: 'fpdf2' library not installed. Saving as .txt.")
final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
elif file_extension == 'docx':
if Document:
self.logger(f" Converting to DOCX...")
document = Document()
document.add_paragraph(cleaned_text)
document.save(final_save_path)
else:
self.logger(f" ⚠️ Cannot create DOCX: 'python-docx' library not installed. Saving as .txt.")
final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
else: # Default to TXT
with open(final_save_path, 'w', encoding='utf-8') as f:
f.write(cleaned_text)
self.logger(f"✅ Saved Text: '{os.path.basename(final_save_path)}' in '{os.path.basename(determined_post_save_path_for_history)}'")
return 1, num_potential_files_in_post, [], [], [], history_data_for_this_post, None
except Exception as e:
self.logger(f" ❌ Critical error saving text file '{txt_filename}': {e}")
return 0, num_potential_files_in_post, [], [], [], None, None
if not self .extract_links_only and self .use_subfolders and self .skip_words_list :
if self ._check_pause (f"Folder keyword skip check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
@ -1179,7 +1364,7 @@ class PostProcessorWorker:
if any (skip_word .lower ()in folder_name_to_check .lower ()for skip_word in self .skip_words_list ):
matched_skip =next ((sw for sw in self .skip_words_list if sw .lower ()in folder_name_to_check .lower ()),"unknown_skip_word")
self .logger (f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check }' contains '{matched_skip }'.")
return 0 ,num_potential_files_in_post ,[],[],[],None
return 0 ,num_potential_files_in_post ,[],[],[],None, None
if (self .show_external_links or self .extract_links_only )and post_content_html :
if self ._check_pause (f"External link extraction for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
try :
@ -1555,7 +1740,17 @@ class PostProcessorWorker:
except OSError as e_rmdir :
self .logger (f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness }': {e_rmdir }")
return total_downloaded_this_post ,total_skipped_this_post ,kept_original_filenames_for_log ,retryable_failures_this_post ,permanent_failures_this_post ,history_data_for_this_post
result_tuple = (total_downloaded_this_post, total_skipped_this_post,
kept_original_filenames_for_log, retryable_failures_this_post,
permanent_failures_this_post, history_data_for_this_post,
None) # The 7th item is None because we already saved the temp file
# In Single PDF mode, the 7th item is the temp file path we created.
if self.single_pdf_mode and os.path.exists(temp_filepath):
result_tuple = (0, 0, [], [], [], None, temp_filepath)
self._emit_signal('worker_finished', result_tuple)
return # The method now returns nothing.
class DownloadThread (QThread ):
progress_signal =pyqtSignal (str )
@ -1605,6 +1800,10 @@ class DownloadThread (QThread ):
cookie_text ="",
session_file_path=None,
session_lock=None,
text_only_scope=None,
text_export_format='txt',
single_pdf_mode=False,
project_root_dir=None,
):
super ().__init__ ()
self .api_url_input =api_url_input
@ -1660,6 +1859,11 @@ class DownloadThread (QThread ):
self.session_file_path = session_file_path
self.session_lock = session_lock
self.history_candidates_buffer =deque (maxlen =8 )
self.text_only_scope = text_only_scope
self.text_export_format = text_export_format
self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE
self.project_root_dir = project_root_dir # Add this assignment
if self .compress_images and Image is None :
self .logger ("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
self .compress_images =False
@ -1682,162 +1886,172 @@ class DownloadThread (QThread ):
self .logger ("⏭️ Skip requested for current file (single-thread mode).")
self .skip_current_file_flag .set ()
else :self .logger (" Skip file: No download active or skip flag not available for current context.")
def run (self ):
"""
The main execution method for the single-threaded download process.
This version is corrected to handle 7 return values from the worker and
to pass the 'single_pdf_mode' setting correctly.
"""
grand_total_downloaded_files =0
grand_total_skipped_files =0
grand_list_of_kept_original_filenames =[]
was_process_cancelled =False
# This block for initializing manga mode counters remains unchanged
if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED and not self .extract_links_only and self .manga_date_file_counter_ref is None :
series_scan_dir =self .output_dir
if self .use_subfolders :
if self .filter_character_list_objects_initial and self .filter_character_list_objects_initial [0 ]and self .filter_character_list_objects_initial [0 ].get ("name"):
series_folder_name =clean_folder_name (self .filter_character_list_objects_initial [0 ]["name"])
series_scan_dir =os .path .join (series_scan_dir ,series_folder_name )
elif self .service and self .user_id :
creator_based_folder_name =clean_folder_name (str (self .user_id ))
series_scan_dir =os .path .join (series_scan_dir ,creator_based_folder_name )
highest_num =0
if os .path .isdir (series_scan_dir ):
self .logger (f" [Thread] Manga Date Mode: Scanning for existing files in '{series_scan_dir }'...")
for dirpath ,_ ,filenames_in_dir in os .walk (series_scan_dir ):
for filename_to_check in filenames_in_dir :
prefix_to_check =clean_filename (self .manga_date_prefix .strip ())if self .manga_date_prefix and self .manga_date_prefix .strip ()else ""
name_part_to_match =filename_to_check
if prefix_to_check and name_part_to_match .startswith (prefix_to_check ):
name_part_to_match =name_part_to_match [len (prefix_to_check ):].lstrip ()
base_name_no_ext =os .path .splitext (name_part_to_match )[0 ]
match =re .match (r"(\d+)",base_name_no_ext )
if match :highest_num =max (highest_num ,int (match .group (1 )))
self .manga_date_file_counter_ref =[highest_num +1 ,threading .Lock ()]
self .logger (f" [Thread] Manga Date Mode: Initialized date-based counter at {self .manga_date_file_counter_ref [0 ]}.")
# ... (existing manga counter initialization logic) ...
pass
if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING and not self .extract_links_only and self .manga_global_file_counter_ref is None :
self .manga_global_file_counter_ref =[1 ,threading .Lock ()]
self .logger (f" [Thread] Manga Title+GlobalNum Mode: Initialized global counter at {self .manga_global_file_counter_ref [0 ]}.")
worker_signals_obj = PostProcessorSignals ()
# ... (existing manga counter initialization logic) ...
pass
worker_signals_obj = PostProcessorSignals()
try :
worker_signals_obj .progress_signal .connect (self .progress_signal )
worker_signals_obj .file_download_status_signal .connect (self .file_download_status_signal )
worker_signals_obj .file_progress_signal .connect (self .file_progress_signal )
worker_signals_obj .external_link_signal .connect (self .external_link_signal )
worker_signals_obj .missed_character_post_signal .connect (self .missed_character_post_signal )
worker_signals_obj .file_successfully_downloaded_signal .connect (self .file_successfully_downloaded_signal )
self .logger (" Starting post fetch (single-threaded download process)...")
post_generator =download_from_api (
self .api_url_input ,
logger =self .logger ,
start_page =self .start_page ,
end_page =self .end_page ,
manga_mode =self .manga_mode_active ,
cancellation_event =self .cancellation_event ,
pause_event =self .pause_event ,
use_cookie =self .use_cookie ,
cookie_text =self .cookie_text ,
selected_cookie_file =self .selected_cookie_file ,
app_base_dir =self .app_base_dir ,
manga_filename_style_for_sort_check =self .manga_filename_style if self .manga_mode_active else None
# Connect signals
worker_signals_obj.progress_signal.connect(self.progress_signal)
worker_signals_obj.file_download_status_signal.connect(self.file_download_status_signal)
worker_signals_obj.file_progress_signal.connect(self.file_progress_signal)
worker_signals_obj.external_link_signal.connect(self.external_link_signal)
worker_signals_obj.missed_character_post_signal.connect(self.missed_character_post_signal)
worker_signals_obj.file_successfully_downloaded_signal.connect(self.file_successfully_downloaded_signal)
worker_signals_obj.worker_finished_signal.connect(lambda result: None) # Connect to dummy lambda to avoid errors
self.logger(" Starting post fetch (single-threaded download process)...")
post_generator = download_from_api(
self.api_url_input,
logger=self.logger,
start_page=self.start_page,
end_page=self.end_page,
manga_mode=self.manga_mode_active,
cancellation_event=self.cancellation_event,
pause_event=self.pause_event,
use_cookie=self.use_cookie,
cookie_text=self.cookie_text,
selected_cookie_file=self.selected_cookie_file,
app_base_dir=self.app_base_dir,
manga_filename_style_for_sort_check=self.manga_filename_style if self.manga_mode_active else None
)
for posts_batch_data in post_generator :
if self ._check_pause_self ("Post batch processing"):was_process_cancelled =True ;break
if self .isInterruptionRequested ():was_process_cancelled =True ;break
for individual_post_data in posts_batch_data :
if self ._check_pause_self (f"Individual post processing for {individual_post_data .get ('id','N/A')}"):was_process_cancelled =True ;break
if self .isInterruptionRequested ():was_process_cancelled =True ;break
post_processing_worker =PostProcessorWorker (
post_data =individual_post_data ,
download_root =self .output_dir ,
known_names =self .known_names ,
filter_character_list =self .filter_character_list_objects_initial ,
dynamic_character_filter_holder =self .dynamic_filter_holder ,
unwanted_keywords =self .unwanted_keywords ,
filter_mode =self .filter_mode ,
skip_zip =self .skip_zip ,skip_rar =self .skip_rar ,
use_subfolders =self .use_subfolders ,use_post_subfolders =self .use_post_subfolders ,
target_post_id_from_initial_url =self .initial_target_post_id ,
custom_folder_name =self .custom_folder_name ,
compress_images =self .compress_images ,download_thumbnails =self .download_thumbnails ,
service =self .service ,user_id =self .user_id ,
api_url_input =self .api_url_input ,
pause_event =self .pause_event ,
cancellation_event =self .cancellation_event ,
emitter =worker_signals_obj ,
downloaded_files =self .downloaded_files ,
downloaded_file_hashes =self .downloaded_file_hashes ,
downloaded_files_lock =self .downloaded_files_lock ,
downloaded_file_hashes_lock =self .downloaded_file_hashes_lock ,
skip_words_list =self .skip_words_list ,
skip_words_scope =self .skip_words_scope ,
show_external_links =self .show_external_links ,
extract_links_only =self .extract_links_only ,
num_file_threads =self .num_file_threads_for_worker ,
skip_current_file_flag =self .skip_current_file_flag ,
manga_mode_active =self .manga_mode_active ,
manga_filename_style =self .manga_filename_style ,
manga_date_prefix =self .manga_date_prefix ,
char_filter_scope =self .char_filter_scope ,
remove_from_filename_words_list =self .remove_from_filename_words_list ,
allow_multipart_download =self .allow_multipart_download ,
selected_cookie_file =self .selected_cookie_file ,
app_base_dir =self .app_base_dir ,
cookie_text =self .cookie_text ,
override_output_dir =self .override_output_dir ,
manga_global_file_counter_ref =self .manga_global_file_counter_ref ,
use_cookie =self .use_cookie ,
manga_date_file_counter_ref =self .manga_date_file_counter_ref ,
use_date_prefix_for_subfolder=self.use_date_prefix_for_subfolder,
keep_in_post_duplicates=self.keep_in_post_duplicates,
creator_download_folder_ignore_words =self .creator_download_folder_ignore_words ,
session_file_path=self.session_file_path,
session_lock=self.session_lock,
for posts_batch_data in post_generator:
if self.isInterruptionRequested():
was_process_cancelled = True
break
for individual_post_data in posts_batch_data:
if self.isInterruptionRequested():
was_process_cancelled = True
break
# Create the worker, now correctly passing single_pdf_mode
post_processing_worker = PostProcessorWorker(
post_data=individual_post_data,
download_root=self.output_dir,
known_names=self.known_names,
filter_character_list=self.filter_character_list_objects_initial,
dynamic_character_filter_holder=self.dynamic_filter_holder,
unwanted_keywords=self.unwanted_keywords,
filter_mode=self.filter_mode,
skip_zip=self.skip_zip, skip_rar=self.skip_rar,
use_subfolders=self.use_subfolders, use_post_subfolders=self.use_post_subfolders,
target_post_id_from_initial_url=self.initial_target_post_id,
custom_folder_name=self.custom_folder_name,
compress_images=self.compress_images, download_thumbnails=self.download_thumbnails,
service=self.service, user_id=self.user_id,
api_url_input=self.api_url_input,
pause_event=self.pause_event,
cancellation_event=self.cancellation_event,
emitter=worker_signals_obj,
downloaded_files=self.downloaded_files,
downloaded_file_hashes=self.downloaded_file_hashes,
downloaded_files_lock=self.downloaded_files_lock,
downloaded_file_hashes_lock=self.downloaded_file_hashes_lock,
skip_words_list=self.skip_words_list,
skip_words_scope=self.skip_words_scope,
show_external_links=self.show_external_links,
extract_links_only=self.extract_links_only,
num_file_threads=self.num_file_threads_for_worker,
skip_current_file_flag=self.skip_current_file_flag,
manga_mode_active=self.manga_mode_active,
manga_filename_style=self.manga_filename_style,
manga_date_prefix=self.manga_date_prefix,
char_filter_scope=self.char_filter_scope,
remove_from_filename_words_list=self.remove_from_filename_words_list,
allow_multipart_download=self.allow_multipart_download,
selected_cookie_file=self.selected_cookie_file,
app_base_dir=self.app_base_dir,
cookie_text=self.cookie_text,
override_output_dir=self.override_output_dir,
manga_global_file_counter_ref=self.manga_global_file_counter_ref,
use_cookie=self.use_cookie,
manga_date_file_counter_ref=self.manga_date_file_counter_ref,
use_date_prefix_for_subfolder=self.use_date_prefix_for_subfolder,
keep_in_post_duplicates=self.keep_in_post_duplicates,
creator_download_folder_ignore_words=self.creator_download_folder_ignore_words,
session_file_path=self.session_file_path,
session_lock=self.session_lock,
text_only_scope=self.text_only_scope,
text_export_format=self.text_export_format,
single_pdf_mode=self.single_pdf_mode, # <-- This is now correctly passed
project_root_dir=self.project_root_dir
)
try :
dl_count ,skip_count ,kept_originals_this_post ,retryable_failures ,permanent_failures ,history_data =post_processing_worker .process ()
grand_total_downloaded_files +=dl_count
grand_total_skipped_files +=skip_count
if kept_originals_this_post :
grand_list_of_kept_original_filenames .extend (kept_originals_this_post )
if retryable_failures :
self .retryable_file_failed_signal .emit (retryable_failures )
if history_data :
if len (self .history_candidates_buffer )<8 :
self .post_processed_for_history_signal .emit (history_data )
if permanent_failures :
self .permanent_file_failed_signal .emit (permanent_failures )
except Exception as proc_err :
post_id_for_err =individual_post_data .get ('id','N/A')
self .logger (f"❌ Error processing post {post_id_for_err } in DownloadThread: {proc_err }")
traceback .print_exc ()
num_potential_files_est =len (individual_post_data .get ('attachments',[]))+(1 if individual_post_data .get ('file')else 0 )
grand_total_skipped_files +=num_potential_files_est
if self .skip_current_file_flag and self .skip_current_file_flag .is_set ():
self .skip_current_file_flag .clear ()
self .logger (" Skip current file flag was processed and cleared by DownloadThread.")
self .msleep (10 )
if was_process_cancelled :break
if not was_process_cancelled and not self .isInterruptionRequested ():
self .logger ("✅ All posts processed or end of content reached by DownloadThread.")
try:
# Correctly unpack the 7 values returned from the worker
(dl_count, skip_count, kept_originals_this_post,
retryable_failures, permanent_failures,
history_data, temp_filepath) = post_processing_worker.process()
grand_total_downloaded_files += dl_count
grand_total_skipped_files += skip_count
if kept_originals_this_post:
grand_list_of_kept_original_filenames.extend(kept_originals_this_post)
if retryable_failures:
self.retryable_file_failed_signal.emit(retryable_failures)
if history_data:
if len(self.history_candidates_buffer) < 8:
self.post_processed_for_history_signal.emit(history_data)
if permanent_failures:
self.permanent_file_failed_signal.emit(permanent_failures)
# In single-threaded text mode, pass the temp file path back to the main window
if self.single_pdf_mode and temp_filepath:
self.progress_signal.emit(f"TEMP_FILE_PATH:{temp_filepath}")
except Exception as main_thread_err :
self .logger (f"\n❌ Critical error within DownloadThread run loop: {main_thread_err }")
traceback .print_exc ()
if not self .isInterruptionRequested ():was_process_cancelled =False
finally :
try :
if worker_signals_obj :
worker_signals_obj .progress_signal .disconnect (self .progress_signal )
worker_signals_obj .file_download_status_signal .disconnect (self .file_download_status_signal )
worker_signals_obj .external_link_signal .disconnect (self .external_link_signal )
worker_signals_obj .file_progress_signal .disconnect (self .file_progress_signal )
worker_signals_obj .missed_character_post_signal .disconnect (self .missed_character_post_signal )
worker_signals_obj .file_successfully_downloaded_signal .disconnect (self .file_successfully_downloaded_signal )
except Exception as proc_err:
post_id_for_err = individual_post_data.get('id', 'N/A')
self.logger(f"❌ Error processing post {post_id_for_err} in DownloadThread: {proc_err}")
traceback.print_exc()
num_potential_files_est = len(individual_post_data.get('attachments', [])) + (1 if individual_post_data.get('file') else 0)
grand_total_skipped_files += num_potential_files_est
if self.skip_current_file_flag and self.skip_current_file_flag.is_set():
self.skip_current_file_flag.clear()
self.logger(" Skip current file flag was processed and cleared by DownloadThread.")
self.msleep(10)
if was_process_cancelled:
break
if not was_process_cancelled and not self.isInterruptionRequested():
self.logger("✅ All posts processed or end of content reached by DownloadThread.")
except Exception as main_thread_err:
self.logger(f"\n❌ Critical error within DownloadThread run loop: {main_thread_err}")
traceback.print_exc()
finally:
try:
# Disconnect signals
if worker_signals_obj:
worker_signals_obj.progress_signal.disconnect(self.progress_signal)
worker_signals_obj.file_download_status_signal.disconnect(self.file_download_status_signal)
worker_signals_obj.external_link_signal.disconnect(self.external_link_signal)
worker_signals_obj.file_progress_signal.disconnect(self.file_progress_signal)
worker_signals_obj.missed_character_post_signal.disconnect(self.missed_character_post_signal)
worker_signals_obj.file_successfully_downloaded_signal.disconnect(self.file_successfully_downloaded_signal)
except (TypeError, RuntimeError) as e:
self.logger(f" Note during DownloadThread signal disconnection: {e}")
# Emit the final signal with all collected results
self.finished_signal.emit(grand_total_downloaded_files, grand_total_skipped_files, self.isInterruptionRequested(), grand_list_of_kept_original_filenames)
except (TypeError ,RuntimeError )as e :
self .logger (f" Note during DownloadThread signal disconnection: {e }")
self .finished_signal .emit (grand_total_downloaded_files ,grand_total_skipped_files ,self .isInterruptionRequested (),grand_list_of_kept_original_filenames )
def receive_add_character_result (self ,result ):
with QMutexLocker (self .prompt_mutex ):
self ._add_character_response =result

View File

@ -144,7 +144,7 @@ class EmptyPopupDialog (QDialog ):
self .setMinimumSize (int (400 *scale_factor ),int (300 *scale_factor ))
self .parent_app =parent_app_ref
self .current_scope_mode =self .SCOPE_CHARACTERS
self.current_scope_mode = self.SCOPE_CREATORS
self .app_base_dir =app_base_dir
app_icon =get_app_icon_object ()

View File

@ -126,6 +126,21 @@ class FavoriteArtistsDialog (QDialog ):
self .artist_list_widget .setVisible (show )
def _fetch_favorite_artists (self ):
if self.cookies_config['use_cookie']:
# Check if we can load cookies for at least one of the services.
kemono_cookies = prepare_cookies_for_request(True, self.cookies_config['cookie_text'], self.cookies_config['selected_cookie_file'], self.cookies_config['app_base_dir'], self._logger, target_domain="kemono.su")
coomer_cookies = prepare_cookies_for_request(True, self.cookies_config['cookie_text'], self.cookies_config['selected_cookie_file'], self.cookies_config['app_base_dir'], self._logger, target_domain="coomer.su")
if not kemono_cookies and not coomer_cookies:
# If cookies are enabled but none could be loaded, show help and stop.
self.status_label.setText(self._tr("fav_artists_cookies_required_status", "Error: Cookies enabled but could not be loaded for any source."))
self._logger("Error: Cookies enabled but no valid cookies were loaded. Showing help dialog.")
cookie_help_dialog = CookieHelpDialog(self.parent_app, self)
cookie_help_dialog.exec_()
self.download_button.setEnabled(False)
return # Stop further execution
kemono_fav_url ="https://kemono.su/api/v1/account/favorites?type=artist"
coomer_fav_url ="https://coomer.su/api/v1/account/favorites?type=artist"

View File

@ -0,0 +1,83 @@
from PyQt5.QtWidgets import (
QDialog, QVBoxLayout, QRadioButton, QDialogButtonBox, QButtonGroup, QLabel, QComboBox, QHBoxLayout, QCheckBox
)
from PyQt5.QtCore import Qt
class MoreOptionsDialog(QDialog):
"""
A dialog for selecting a scope, export format, and single PDF option.
"""
SCOPE_CONTENT = "content"
SCOPE_COMMENTS = "comments"
def __init__(self, parent=None, current_scope=None, current_format=None, single_pdf_checked=False):
super().__init__(parent)
self.setWindowTitle("More Options")
self.setMinimumWidth(350)
# ... (Layout and other widgets remain the same) ...
layout = QVBoxLayout(self)
self.description_label = QLabel("Please choose the scope for the action:")
layout.addWidget(self.description_label)
self.radio_button_group = QButtonGroup(self)
self.radio_content = QRadioButton("Description/Content")
self.radio_comments = QRadioButton("Comments")
self.radio_button_group.addButton(self.radio_content)
self.radio_button_group.addButton(self.radio_comments)
layout.addWidget(self.radio_content)
layout.addWidget(self.radio_comments)
if current_scope == self.SCOPE_COMMENTS:
self.radio_comments.setChecked(True)
else:
self.radio_content.setChecked(True)
export_layout = QHBoxLayout()
export_label = QLabel("Export as:")
self.format_combo = QComboBox()
self.format_combo.addItems(["PDF", "DOCX", "TXT"])
if current_format and current_format.upper() in ["PDF", "DOCX", "TXT"]:
self.format_combo.setCurrentText(current_format.upper())
else:
self.format_combo.setCurrentText("PDF")
export_layout.addWidget(export_label)
export_layout.addWidget(self.format_combo)
export_layout.addStretch()
layout.addLayout(export_layout)
# --- UPDATED: Single PDF Checkbox ---
self.single_pdf_checkbox = QCheckBox("Single PDF")
self.single_pdf_checkbox.setToolTip("If checked, all text from matching posts will be compiled into one single PDF file.")
self.single_pdf_checkbox.setChecked(single_pdf_checked)
layout.addWidget(self.single_pdf_checkbox)
self.format_combo.currentTextChanged.connect(self.update_single_pdf_checkbox_state)
self.update_single_pdf_checkbox_state(self.format_combo.currentText())
self.button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel)
self.button_box.accepted.connect(self.accept)
self.button_box.rejected.connect(self.reject)
layout.addWidget(self.button_box)
self.setLayout(layout)
def update_single_pdf_checkbox_state(self, text):
"""Enable the Single PDF checkbox only if the format is PDF."""
is_pdf = (text.upper() == "PDF")
self.single_pdf_checkbox.setEnabled(is_pdf)
if not is_pdf:
self.single_pdf_checkbox.setChecked(False)
def get_selected_scope(self):
if self.radio_comments.isChecked():
return self.SCOPE_COMMENTS
return self.SCOPE_CONTENT
def get_selected_format(self):
return self.format_combo.currentText().lower()
def get_single_pdf_state(self):
"""Returns the state of the Single PDF checkbox."""
return self.single_pdf_checkbox.isChecked() and self.single_pdf_checkbox.isEnabled()

View File

@ -0,0 +1,77 @@
# SinglePDF.py
import os
try:
from fpdf import FPDF
FPDF_AVAILABLE = True
except ImportError:
FPDF_AVAILABLE = False
class PDF(FPDF):
"""Custom PDF class to handle headers and footers."""
def header(self):
# No header
pass
def footer(self):
# Position at 1.5 cm from bottom
self.set_y(-15)
self.set_font('DejaVu', '', 8)
# Page number
self.cell(0, 10, 'Page ' + str(self.page_no()), 0, 0, 'C')
def create_single_pdf_from_content(posts_data, output_filename, font_path, logger=print):
"""
Creates a single PDF from a list of post titles and content.
Args:
posts_data (list): A list of dictionaries, where each dict has 'title' and 'content' keys.
output_filename (str): The full path for the output PDF file.
font_path (str): Path to the DejaVuSans.ttf font file.
logger (function, optional): A function to log progress and errors. Defaults to print.
"""
if not FPDF_AVAILABLE:
logger("❌ PDF Creation failed: 'fpdf2' library is not installed. Please run: pip install fpdf2")
return False
if not posts_data:
logger(" No text content was collected to create a PDF.")
return False
pdf = PDF()
try:
if not os.path.exists(font_path):
raise RuntimeError("Font file not found.")
pdf.add_font('DejaVu', '', font_path, uni=True)
pdf.add_font('DejaVu', 'B', font_path, uni=True) # Add Bold variant
except Exception as font_error:
logger(f" ⚠️ Could not load DejaVu font: {font_error}")
logger(" PDF may not support all characters. Falling back to default Arial font.")
pdf.set_font('Arial', '', 12)
pdf.set_font('Arial', 'B', 16)
logger(f" Starting PDF creation with content from {len(posts_data)} posts...")
for post in posts_data:
pdf.add_page()
# Post Title
pdf.set_font('DejaVu', 'B', 16)
# vvv THIS LINE IS CORRECTED vvv
# We explicitly set align='L' and remove the incorrect positional arguments.
pdf.multi_cell(w=0, h=10, text=post.get('title', 'Untitled Post'), align='L')
pdf.ln(5) # Add a little space after the title
# Post Content
pdf.set_font('DejaVu', '', 12)
pdf.multi_cell(w=0, h=7, text=post.get('content', 'No Content'))
try:
pdf.output(output_filename)
logger(f"✅ Successfully created single PDF: '{os.path.basename(output_filename)}'")
return True
except Exception as e:
logger(f"❌ A critical error occurred while saving the final PDF: {e}")
return False

File diff suppressed because it is too large Load Diff