This commit is contained in:
Yuvi9587
2025-07-13 10:22:06 -07:00
parent f0bf74da16
commit dbdf82a079
18 changed files with 7032 additions and 1123 deletions

View File

@@ -1,12 +1,10 @@
# --- Standard Library Imports ---
import time
import traceback
from urllib.parse import urlparse
# --- Third-Party Library Imports ---
import json # Ensure json is imported
import requests
# --- Local Application Imports ---
# (Keep the rest of your imports)
from ..utils.network_utils import extract_post_info, prepare_cookies_for_request
from ..config.constants import (
STYLE_DATE_POST_TITLE
@@ -15,36 +13,24 @@ from ..config.constants import (
def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None, pause_event=None, cookies_dict=None):
"""
Fetches a single page of posts from the API with retry logic.
Args:
api_url_base (str): The base URL for the user's posts.
headers (dict): The request headers.
offset (int): The offset for pagination.
logger (callable): Function to log messages.
cancellation_event (threading.Event): Event to signal cancellation.
pause_event (threading.Event): Event to signal pause.
cookies_dict (dict): A dictionary of cookies to include in the request.
Returns:
list: A list of post data dictionaries from the API.
Raises:
RuntimeError: If the fetch fails after all retries or encounters a non-retryable error.
Fetches a single page of posts from the API with robust retry logic.
NEW: Requests only essential fields to keep the response size small and reliable.
"""
if cancellation_event and cancellation_event.is_set():
logger(" Fetch cancelled before request.")
raise RuntimeError("Fetch operation cancelled by user.")
if pause_event and pause_event.is_set():
logger(" Post fetching paused...")
while pause_event.is_set():
if cancellation_event and cancellation_event.is_set():
logger(" Post fetching cancelled while paused.")
raise RuntimeError("Fetch operation cancelled by user.")
raise RuntimeError("Fetch operation cancelled by user while paused.")
time.sleep(0.5)
logger(" Post fetching resumed.")
paginated_url = f'{api_url_base}?o={offset}'
# --- MODIFICATION: Added `fields` to the URL to request only metadata ---
# This prevents the large 'content' field from being included in the list, avoiding timeouts.
fields_to_request = "id,user,service,title,shared_file,added,published,edited,file,attachments,tags"
paginated_url = f'{api_url_base}?o={offset}&fields={fields_to_request}'
max_retries = 3
retry_delay = 5
@@ -52,22 +38,18 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
if cancellation_event and cancellation_event.is_set():
raise RuntimeError("Fetch operation cancelled by user during retry loop.")
log_message = f" Fetching: {paginated_url} (Page approx. {offset // 50 + 1})"
log_message = f" Fetching post list: {api_url_base}?o={offset} (Page approx. {offset // 50 + 1})"
if attempt > 0:
log_message += f" (Attempt {attempt + 1}/{max_retries})"
logger(log_message)
try:
response = requests.get(paginated_url, headers=headers, timeout=(15, 90), cookies=cookies_dict)
# We can now remove the streaming logic as the response will be small and fast.
response = requests.get(paginated_url, headers=headers, timeout=(15, 60), cookies=cookies_dict)
response.raise_for_status()
if 'application/json' not in response.headers.get('Content-Type', '').lower():
logger(f"⚠️ Unexpected content type from API: {response.headers.get('Content-Type')}. Body: {response.text[:200]}")
return []
return response.json()
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
except requests.exceptions.RequestException as e:
logger(f" ⚠️ Retryable network error on page fetch (Attempt {attempt + 1}): {e}")
if attempt < max_retries - 1:
delay = retry_delay * (2 ** attempt)
@@ -76,18 +58,46 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
continue
else:
logger(f" ❌ Failed to fetch page after {max_retries} attempts.")
raise RuntimeError(f"Timeout or connection error fetching offset {offset}")
except requests.exceptions.RequestException as e:
err_msg = f"Error fetching offset {offset}: {e}"
if e.response is not None:
err_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})"
raise RuntimeError(err_msg)
except ValueError as e: # JSON decode error
raise RuntimeError(f"Error decoding JSON from offset {offset}: {e}. Response: {response.text[:200]}")
raise RuntimeError(f"Network error fetching offset {offset}")
except json.JSONDecodeError as e:
logger(f" ❌ Failed to decode JSON on page fetch (Attempt {attempt + 1}): {e}")
if attempt < max_retries - 1:
delay = retry_delay * (2 ** attempt)
logger(f" Retrying in {delay} seconds...")
time.sleep(delay)
continue
else:
raise RuntimeError(f"JSONDecodeError fetching offset {offset}")
raise RuntimeError(f"Failed to fetch page {paginated_url} after all attempts.")
def fetch_single_post_data(api_domain, service, user_id, post_id, headers, logger, cookies_dict=None):
"""
--- NEW FUNCTION ---
Fetches the full data, including the 'content' field, for a single post.
"""
post_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}"
logger(f" Fetching full content for post ID {post_id}...")
try:
# Use streaming here as a precaution for single posts that are still very large.
with requests.get(post_api_url, headers=headers, timeout=(15, 300), cookies=cookies_dict, stream=True) as response:
response.raise_for_status()
response_body = b""
for chunk in response.iter_content(chunk_size=8192):
response_body += chunk
full_post_data = json.loads(response_body)
# The API sometimes wraps the post in a list, handle that.
if isinstance(full_post_data, list) and full_post_data:
return full_post_data[0]
return full_post_data
except Exception as e:
logger(f" ❌ Failed to fetch full content for post {post_id}: {e}")
return None
def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger, cancellation_event=None, pause_event=None, cookies_dict=None):
"""Fetches all comments for a specific post."""
if cancellation_event and cancellation_event.is_set():

View File

@@ -20,6 +20,26 @@ try:
from PIL import Image
except ImportError:
Image = None
#
try:
from fpdf import FPDF
# Add a simple class to handle the header/footer for stories
class PDF(FPDF):
def header(self):
pass # No header
def footer(self):
self.set_y(-15)
self.set_font('Arial', 'I', 8)
self.cell(0, 10, 'Page %s' % self.page_no(), 0, 0, 'C')
except ImportError:
FPDF = None
try:
from docx import Document
except ImportError:
Document = None
# --- PyQt5 Imports ---
from PyQt5 .QtCore import Qt ,QThread ,pyqtSignal ,QMutex ,QMutexLocker ,QObject ,QTimer ,QSettings ,QStandardPaths ,QCoreApplication ,QUrl ,QSize ,QProcess
# --- Local Application Imports ---
@@ -48,7 +68,8 @@ class PostProcessorSignals (QObject ):
file_progress_signal =pyqtSignal (str ,object )
file_successfully_downloaded_signal =pyqtSignal (dict )
missed_character_post_signal =pyqtSignal (str ,str )
worker_finished_signal = pyqtSignal(tuple)
class PostProcessorWorker:
def __init__ (self ,post_data ,download_root ,known_names ,
filter_character_list ,emitter ,
@@ -81,6 +102,10 @@ class PostProcessorWorker:
keep_in_post_duplicates=False,
session_file_path=None,
session_lock=None,
text_only_scope=None,
text_export_format='txt',
single_pdf_mode=False,
project_root_dir=None,
):
self .post =post_data
self .download_root =download_root
@@ -134,6 +159,10 @@ class PostProcessorWorker:
self.keep_in_post_duplicates = keep_in_post_duplicates
self.session_file_path = session_file_path
self.session_lock = session_lock
self.text_only_scope = text_only_scope
self.text_export_format = text_export_format
self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE
self.project_root_dir = project_root_dir
if self .compress_images and Image is None :
self .logger ("⚠️ Image compression disabled: Pillow library not found.")
@@ -557,6 +586,8 @@ class PostProcessorWorker:
final_total_for_progress =total_size_bytes if download_successful_flag and total_size_bytes >0 else downloaded_size_bytes
self ._emit_signal ('file_progress',api_original_filename ,(downloaded_size_bytes ,final_total_for_progress ))
# --- Start of Replacement Block ---
# Rescue download if an IncompleteRead error occurred but the file is complete
if (not download_successful_flag and
isinstance(last_exception_for_retry_later, http.client.IncompleteRead) and
@@ -614,33 +645,32 @@ class PostProcessorWorker:
is_img_for_compress_check = is_image(api_original_filename)
if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024):
# ... (This block for image compression remains the same)
self .logger (f" Compressing '{api_original_filename }' ({downloaded_size_bytes /(1024 *1024 ):.2f} MB)...")
if self ._check_pause (f"Image compression for '{api_original_filename }'"):return 0 ,1 ,filename_to_save_in_main_path ,was_original_name_kept_flag ,FILE_DOWNLOAD_STATUS_SKIPPED ,None
img_content_for_pillow =None
try :
with open (downloaded_part_file_path ,'rb')as f_img_in :
img_content_for_pillow =BytesIO (f_img_in .read ())
with Image .open (img_content_for_pillow )as img_obj :
if img_obj .mode =='P':img_obj =img_obj .convert ('RGBA')
elif img_obj .mode not in ['RGB','RGBA','L']:img_obj =img_obj .convert ('RGB')
compressed_output_io =BytesIO ()
img_obj .save (compressed_output_io ,format ='WebP',quality =80 ,method =4 )
compressed_size =compressed_output_io .getbuffer ().nbytes
if compressed_size <downloaded_size_bytes *0.9 :
self .logger (f" Compression success: {compressed_size /(1024 *1024 ):.2f} MB.")
data_to_write_io =compressed_output_io
data_to_write_io .seek (0 )
base_name_orig ,_ =os .path .splitext (filename_after_compression )
filename_after_compression =base_name_orig +'.webp'
self .logger (f" Updated filename (compressed): {filename_after_compression }")
else :
self .logger (f" Compression skipped: WebP not significantly smaller.")
if compressed_output_io :compressed_output_io .close ()
except Exception as comp_e :
self .logger (f"❌ Compression failed for '{api_original_filename }': {comp_e }. Saving original.")
finally :
if img_content_for_pillow :img_content_for_pillow .close ()
self.logger(f" Compressing '{api_original_filename}' ({downloaded_size_bytes / (1024 * 1024):.2f} MB)...")
if self._check_pause(f"Image compression for '{api_original_filename}'"): return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
img_content_for_pillow = None
try:
with open(downloaded_part_file_path, 'rb') as f_img_in:
img_content_for_pillow = BytesIO(f_img_in.read())
with Image.open(img_content_for_pillow) as img_obj:
if img_obj.mode == 'P': img_obj = img_obj.convert('RGBA')
elif img_obj.mode not in ['RGB', 'RGBA', 'L']: img_obj = img_obj.convert('RGB')
compressed_output_io = BytesIO()
img_obj.save(compressed_output_io, format='WebP', quality=80, method=4)
compressed_size = compressed_output_io.getbuffer().nbytes
if compressed_size < downloaded_size_bytes * 0.9:
self.logger(f" Compression success: {compressed_size / (1024 * 1024):.2f} MB.")
data_to_write_io = compressed_output_io
data_to_write_io.seek(0)
base_name_orig, _ = os.path.splitext(filename_after_compression)
filename_after_compression = base_name_orig + '.webp'
self.logger(f" Updated filename (compressed): {filename_after_compression}")
else:
self.logger(f" Compression skipped: WebP not significantly smaller.")
if compressed_output_io: compressed_output_io.close()
except Exception as comp_e:
self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original.")
finally:
if img_content_for_pillow: img_content_for_pillow.close()
final_filename_on_disk = filename_after_compression
temp_base, temp_ext = os.path.splitext(final_filename_on_disk)
@@ -695,11 +725,14 @@ class PostProcessorWorker:
return 1, 0, final_filename_saved_for_return, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SUCCESS, None
except Exception as save_err:
self.logger(f"->>Save Fail for '{final_filename_on_disk}': {save_err}")
if os.path.exists(final_save_path):
try: os.remove(final_save_path)
except OSError: self.logger(f" -> Failed to remove partially saved file: {final_save_path}")
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
self.logger(f"->>Save Fail for '{final_filename_on_disk}': {save_err}")
if os.path.exists(final_save_path):
try: os.remove(final_save_path)
except OSError: self.logger(f" -> Failed to remove partially saved file: {final_save_path}")
# --- FIX: Report as a permanent failure so it appears in the error dialog ---
permanent_failure_details = { 'file_info': file_info, 'target_folder_path': target_folder_path, 'headers': headers, 'original_post_id_for_log': original_post_id_for_log, 'post_title': post_title, 'file_index_in_post': file_index_in_post, 'num_files_in_this_post': num_files_in_this_post, 'forced_filename_override': filename_to_save_in_main_path, }
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION, permanent_failure_details
finally:
if data_to_write_io and hasattr(data_to_write_io, 'close'):
data_to_write_io.close()
@@ -738,14 +771,16 @@ class PostProcessorWorker:
effective_save_folder =target_folder_path
filename_after_styling_and_word_removal =filename_to_save_in_main_path
try :
os .makedirs (effective_save_folder ,exist_ok =True )
except OSError as e :
self .logger (f" ❌ Critical error creating directory '{effective_save_folder }': {e }. Skipping file '{api_original_filename }'.")
if downloaded_part_file_path and os .path .exists (downloaded_part_file_path ):
try :os .remove (downloaded_part_file_path )
except OSError :pass
return 0 ,1 ,api_original_filename ,False ,FILE_DOWNLOAD_STATUS_SKIPPED ,None
try:
os.makedirs(effective_save_folder, exist_ok=True)
except OSError as e:
self.logger(f" ❌ Critical error creating directory '{effective_save_folder}': {e}. Skipping file '{api_original_filename}'.")
if downloaded_part_file_path and os.path.exists(downloaded_part_file_path):
try: os.remove(downloaded_part_file_path)
except OSError: pass
# --- FIX: Report as a permanent failure so it appears in the error dialog ---
permanent_failure_details = { 'file_info': file_info, 'target_folder_path': target_folder_path, 'headers': headers, 'original_post_id_for_log': original_post_id_for_log, 'post_title': post_title, 'file_index_in_post': file_index_in_post, 'num_files_in_this_post': num_files_in_this_post, 'forced_filename_override': filename_to_save_in_main_path, }
return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION, permanent_failure_details
data_to_write_io =None
filename_after_compression =filename_after_styling_and_word_removal
@@ -849,8 +884,8 @@ class PostProcessorWorker:
data_to_write_io .close ()
def process (self ):
if self ._check_pause (f"Post processing for ID {self .post .get ('id','N/A')}"):return 0 ,0 ,[],[],[],None
if self .check_cancel ():return 0 ,0 ,[],[],[],None
if self ._check_pause (f"Post processing for ID {self .post .get ('id','N/A')}"):return 0 ,0 ,[],[],[],None, None
if self .check_cancel ():return 0 ,0 ,[],[],[],None, None
current_character_filters =self ._get_current_character_filters ()
kept_original_filenames_for_log =[]
retryable_failures_this_post =[]
@@ -986,23 +1021,23 @@ class PostProcessorWorker:
if self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match :
self .logger (f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title [:50 ]}' does not match character filters.")
self ._emit_signal ('missed_character_post',post_title ,"No title match for character filter")
return 0 ,num_potential_files_in_post ,[],[],[],None
return 0 ,num_potential_files_in_post ,[],[],[],None, None
if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match :
self .logger (f" -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id }', Title '{post_title [:50 ]}...'")
if self .emitter and hasattr (self .emitter ,'missed_character_post_signal'):
self ._emit_signal ('missed_character_post',post_title ,"No character match in files or comments (Comments scope)")
return 0 ,num_potential_files_in_post ,[],[],[],None
return 0 ,num_potential_files_in_post ,[],[],[],None, None
if self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH ):
if self ._check_pause (f"Skip words (post title) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
post_title_lower =post_title .lower ()
for skip_word in self .skip_words_list :
if skip_word .lower ()in post_title_lower :
self .logger (f" -> Skip Post (Keyword in Title '{skip_word }'): '{post_title [:50 ]}...'. Scope: {self .skip_words_scope }")
return 0 ,num_potential_files_in_post ,[],[],[],None
return 0 ,num_potential_files_in_post ,[],[],[],None, None
if not self .extract_links_only and self .manga_mode_active and current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and not post_is_candidate_by_title_char_match :
self .logger (f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title [:50 ]}' doesn't match filters.")
self ._emit_signal ('missed_character_post',post_title ,"Manga Mode: No title match for character filter (Title/Both scope)")
return 0 ,num_potential_files_in_post ,[],[],[],None
return 0 ,num_potential_files_in_post ,[],[],[],None, None
if not isinstance (post_attachments ,list ):
self .logger (f"⚠️ Corrupt attachment data for post {post_id } (expected list, got {type (post_attachments )}). Skipping attachments.")
post_attachments =[]
@@ -1171,6 +1206,156 @@ class PostProcessorWorker:
break
determined_post_save_path_for_history =os .path .join (base_path_for_post_subfolder ,final_post_subfolder_name )
if self.filter_mode == 'text_only' and not self.extract_links_only:
self.logger(f" Mode: Text Only (Scope: {self.text_only_scope})")
# --- Apply Title-based filters to ensure post is a candidate ---
post_title_lower = post_title.lower()
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH):
for skip_word in self.skip_words_list:
if skip_word.lower() in post_title_lower:
self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'.")
return 0, num_potential_files_in_post, [], [], [], None, None
if current_character_filters and not post_is_candidate_by_title_char_match and not post_is_candidate_by_comment_char_match and not post_is_candidate_by_file_char_match_in_comment_scope:
self.logger(f" -> Skip Post (No character match for text extraction): '{post_title[:50]}...'.")
return 0, num_potential_files_in_post, [], [], [], None, None
# --- Get the text content based on scope ---
raw_text_content = ""
final_post_data = post_data
# Fetch full post data if content is missing and scope is 'content'
if self.text_only_scope == 'content' and 'content' not in final_post_data:
self.logger(f" Post {post_id} is missing 'content' field, fetching full data...")
parsed_url = urlparse(self.api_url_input)
api_domain = parsed_url.netloc
cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain)
from .api_client import fetch_single_post_data # Local import to avoid circular dependency issues
full_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies)
if full_data:
final_post_data = full_data
if self.text_only_scope == 'content':
raw_text_content = final_post_data.get('content', '')
elif self.text_only_scope == 'comments':
try:
parsed_url = urlparse(self.api_url_input)
api_domain = parsed_url.netloc
comments_data = fetch_post_comments(api_domain, self.service, self.user_id, post_id, headers, self.logger, self.cancellation_event, self.pause_event)
if comments_data:
comment_texts = []
for comment in comments_data:
user = comment.get('user', {}).get('name', 'Unknown User')
timestamp = comment.get('updated', 'No Date')
body = strip_html_tags(comment.get('content', ''))
comment_texts.append(f"--- Comment by {user} on {timestamp} ---\n{body}\n")
raw_text_content = "\n".join(comment_texts)
except Exception as e:
self.logger(f" ❌ Error fetching comments for text-only mode: {e}")
if not raw_text_content or not raw_text_content.strip():
self.logger(" -> Skip Saving Text: No content/comments found or fetched.")
return 0, num_potential_files_in_post, [], [], [], None, None
# --- Robust HTML-to-TEXT Conversion ---
paragraph_pattern = re.compile(r'<p.*?>(.*?)</p>', re.IGNORECASE | re.DOTALL)
html_paragraphs = paragraph_pattern.findall(raw_text_content)
cleaned_text = ""
if not html_paragraphs:
self.logger(" ⚠️ No <p> tags found. Falling back to basic HTML cleaning for the whole block.")
text_with_br = re.sub(r'<br\s*/?>', '\n', raw_text_content, flags=re.IGNORECASE)
cleaned_text = re.sub(r'<.*?>', '', text_with_br)
else:
cleaned_paragraphs_list = []
for p_content in html_paragraphs:
p_with_br = re.sub(r'<br\s*/?>', '\n', p_content, flags=re.IGNORECASE)
p_cleaned = re.sub(r'<.*?>', '', p_with_br)
p_final = html.unescape(p_cleaned).strip()
if p_final:
cleaned_paragraphs_list.append(p_final)
cleaned_text = '\n\n'.join(cleaned_paragraphs_list)
cleaned_text = cleaned_text.replace('', '...')
# --- Logic for Single PDF Mode (File-based) ---
if self.single_pdf_mode:
if not cleaned_text:
return 0, 0, [], [], [], None, None
content_data = {
'title': post_title,
'content': cleaned_text,
'published': self.post.get('published') or self.post.get('added')
}
temp_dir = os.path.join(self.app_base_dir, "appdata")
os.makedirs(temp_dir, exist_ok=True)
temp_filename = f"tmp_{post_id}_{uuid.uuid4().hex[:8]}.json"
temp_filepath = os.path.join(temp_dir, temp_filename)
try:
with open(temp_filepath, 'w', encoding='utf-8') as f:
json.dump(content_data, f, indent=2)
self.logger(f" Saved temporary text for '{post_title}' for single PDF compilation.")
return 0, 0, [], [], [], None, temp_filepath
except Exception as e:
self.logger(f" ❌ Failed to write temporary file for single PDF: {e}")
return 0, 0, [], [], [], None, None
# --- Logic for Individual File Saving ---
else:
file_extension = self.text_export_format
txt_filename = clean_filename(post_title) + f".{file_extension}"
final_save_path = os.path.join(determined_post_save_path_for_history, txt_filename)
try:
os.makedirs(determined_post_save_path_for_history, exist_ok=True)
base, ext = os.path.splitext(final_save_path)
counter = 1
while os.path.exists(final_save_path):
final_save_path = f"{base}_{counter}{ext}"
counter += 1
if file_extension == 'pdf':
if FPDF:
self.logger(f" Converting to PDF...")
pdf = PDF()
font_path = os.path.join(self.app_base_dir, 'data', 'dejavu-sans', 'DejaVuSans.ttf')
try:
if not os.path.exists(font_path): raise RuntimeError(f"Font file not found: {font_path}")
pdf.add_font('DejaVu', '', font_path, uni=True)
pdf.set_font('DejaVu', '', 12)
except Exception as font_error:
self.logger(f" ⚠️ Could not load DejaVu font: {font_error}. Falling back to Arial.")
pdf.set_font('Arial', '', 12)
pdf.add_page()
pdf.multi_cell(0, 5, cleaned_text)
pdf.output(final_save_path)
else:
self.logger(f" ⚠️ Cannot create PDF: 'fpdf2' library not installed. Saving as .txt.")
final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
elif file_extension == 'docx':
if Document:
self.logger(f" Converting to DOCX...")
document = Document()
document.add_paragraph(cleaned_text)
document.save(final_save_path)
else:
self.logger(f" ⚠️ Cannot create DOCX: 'python-docx' library not installed. Saving as .txt.")
final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
else: # Default to TXT
with open(final_save_path, 'w', encoding='utf-8') as f:
f.write(cleaned_text)
self.logger(f"✅ Saved Text: '{os.path.basename(final_save_path)}' in '{os.path.basename(determined_post_save_path_for_history)}'")
return 1, num_potential_files_in_post, [], [], [], history_data_for_this_post, None
except Exception as e:
self.logger(f" ❌ Critical error saving text file '{txt_filename}': {e}")
return 0, num_potential_files_in_post, [], [], [], None, None
if not self .extract_links_only and self .use_subfolders and self .skip_words_list :
if self ._check_pause (f"Folder keyword skip check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
@@ -1179,7 +1364,7 @@ class PostProcessorWorker:
if any (skip_word .lower ()in folder_name_to_check .lower ()for skip_word in self .skip_words_list ):
matched_skip =next ((sw for sw in self .skip_words_list if sw .lower ()in folder_name_to_check .lower ()),"unknown_skip_word")
self .logger (f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check }' contains '{matched_skip }'.")
return 0 ,num_potential_files_in_post ,[],[],[],None
return 0 ,num_potential_files_in_post ,[],[],[],None, None
if (self .show_external_links or self .extract_links_only )and post_content_html :
if self ._check_pause (f"External link extraction for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
try :
@@ -1555,7 +1740,17 @@ class PostProcessorWorker:
except OSError as e_rmdir :
self .logger (f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness }': {e_rmdir }")
return total_downloaded_this_post ,total_skipped_this_post ,kept_original_filenames_for_log ,retryable_failures_this_post ,permanent_failures_this_post ,history_data_for_this_post
result_tuple = (total_downloaded_this_post, total_skipped_this_post,
kept_original_filenames_for_log, retryable_failures_this_post,
permanent_failures_this_post, history_data_for_this_post,
None) # The 7th item is None because we already saved the temp file
# In Single PDF mode, the 7th item is the temp file path we created.
if self.single_pdf_mode and os.path.exists(temp_filepath):
result_tuple = (0, 0, [], [], [], None, temp_filepath)
self._emit_signal('worker_finished', result_tuple)
return # The method now returns nothing.
class DownloadThread (QThread ):
progress_signal =pyqtSignal (str )
@@ -1605,6 +1800,10 @@ class DownloadThread (QThread ):
cookie_text ="",
session_file_path=None,
session_lock=None,
text_only_scope=None,
text_export_format='txt',
single_pdf_mode=False,
project_root_dir=None,
):
super ().__init__ ()
self .api_url_input =api_url_input
@@ -1660,6 +1859,11 @@ class DownloadThread (QThread ):
self.session_file_path = session_file_path
self.session_lock = session_lock
self.history_candidates_buffer =deque (maxlen =8 )
self.text_only_scope = text_only_scope
self.text_export_format = text_export_format
self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE
self.project_root_dir = project_root_dir # Add this assignment
if self .compress_images and Image is None :
self .logger ("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
self .compress_images =False
@@ -1682,162 +1886,172 @@ class DownloadThread (QThread ):
self .logger ("⏭️ Skip requested for current file (single-thread mode).")
self .skip_current_file_flag .set ()
else :self .logger (" Skip file: No download active or skip flag not available for current context.")
def run (self ):
"""
The main execution method for the single-threaded download process.
This version is corrected to handle 7 return values from the worker and
to pass the 'single_pdf_mode' setting correctly.
"""
grand_total_downloaded_files =0
grand_total_skipped_files =0
grand_list_of_kept_original_filenames =[]
was_process_cancelled =False
# This block for initializing manga mode counters remains unchanged
if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED and not self .extract_links_only and self .manga_date_file_counter_ref is None :
series_scan_dir =self .output_dir
if self .use_subfolders :
if self .filter_character_list_objects_initial and self .filter_character_list_objects_initial [0 ]and self .filter_character_list_objects_initial [0 ].get ("name"):
series_folder_name =clean_folder_name (self .filter_character_list_objects_initial [0 ]["name"])
series_scan_dir =os .path .join (series_scan_dir ,series_folder_name )
elif self .service and self .user_id :
creator_based_folder_name =clean_folder_name (str (self .user_id ))
series_scan_dir =os .path .join (series_scan_dir ,creator_based_folder_name )
highest_num =0
if os .path .isdir (series_scan_dir ):
self .logger (f" [Thread] Manga Date Mode: Scanning for existing files in '{series_scan_dir }'...")
for dirpath ,_ ,filenames_in_dir in os .walk (series_scan_dir ):
for filename_to_check in filenames_in_dir :
prefix_to_check =clean_filename (self .manga_date_prefix .strip ())if self .manga_date_prefix and self .manga_date_prefix .strip ()else ""
name_part_to_match =filename_to_check
if prefix_to_check and name_part_to_match .startswith (prefix_to_check ):
name_part_to_match =name_part_to_match [len (prefix_to_check ):].lstrip ()
base_name_no_ext =os .path .splitext (name_part_to_match )[0 ]
match =re .match (r"(\d+)",base_name_no_ext )
if match :highest_num =max (highest_num ,int (match .group (1 )))
self .manga_date_file_counter_ref =[highest_num +1 ,threading .Lock ()]
self .logger (f" [Thread] Manga Date Mode: Initialized date-based counter at {self .manga_date_file_counter_ref [0 ]}.")
# ... (existing manga counter initialization logic) ...
pass
if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING and not self .extract_links_only and self .manga_global_file_counter_ref is None :
self .manga_global_file_counter_ref =[1 ,threading .Lock ()]
self .logger (f" [Thread] Manga Title+GlobalNum Mode: Initialized global counter at {self .manga_global_file_counter_ref [0 ]}.")
worker_signals_obj = PostProcessorSignals ()
# ... (existing manga counter initialization logic) ...
pass
worker_signals_obj = PostProcessorSignals()
try :
worker_signals_obj .progress_signal .connect (self .progress_signal )
worker_signals_obj .file_download_status_signal .connect (self .file_download_status_signal )
worker_signals_obj .file_progress_signal .connect (self .file_progress_signal )
worker_signals_obj .external_link_signal .connect (self .external_link_signal )
worker_signals_obj .missed_character_post_signal .connect (self .missed_character_post_signal )
worker_signals_obj .file_successfully_downloaded_signal .connect (self .file_successfully_downloaded_signal )
self .logger (" Starting post fetch (single-threaded download process)...")
post_generator =download_from_api (
self .api_url_input ,
logger =self .logger ,
start_page =self .start_page ,
end_page =self .end_page ,
manga_mode =self .manga_mode_active ,
cancellation_event =self .cancellation_event ,
pause_event =self .pause_event ,
use_cookie =self .use_cookie ,
cookie_text =self .cookie_text ,
selected_cookie_file =self .selected_cookie_file ,
app_base_dir =self .app_base_dir ,
manga_filename_style_for_sort_check =self .manga_filename_style if self .manga_mode_active else None
# Connect signals
worker_signals_obj.progress_signal.connect(self.progress_signal)
worker_signals_obj.file_download_status_signal.connect(self.file_download_status_signal)
worker_signals_obj.file_progress_signal.connect(self.file_progress_signal)
worker_signals_obj.external_link_signal.connect(self.external_link_signal)
worker_signals_obj.missed_character_post_signal.connect(self.missed_character_post_signal)
worker_signals_obj.file_successfully_downloaded_signal.connect(self.file_successfully_downloaded_signal)
worker_signals_obj.worker_finished_signal.connect(lambda result: None) # Connect to dummy lambda to avoid errors
self.logger(" Starting post fetch (single-threaded download process)...")
post_generator = download_from_api(
self.api_url_input,
logger=self.logger,
start_page=self.start_page,
end_page=self.end_page,
manga_mode=self.manga_mode_active,
cancellation_event=self.cancellation_event,
pause_event=self.pause_event,
use_cookie=self.use_cookie,
cookie_text=self.cookie_text,
selected_cookie_file=self.selected_cookie_file,
app_base_dir=self.app_base_dir,
manga_filename_style_for_sort_check=self.manga_filename_style if self.manga_mode_active else None
)
for posts_batch_data in post_generator :
if self ._check_pause_self ("Post batch processing"):was_process_cancelled =True ;break
if self .isInterruptionRequested ():was_process_cancelled =True ;break
for individual_post_data in posts_batch_data :
if self ._check_pause_self (f"Individual post processing for {individual_post_data .get ('id','N/A')}"):was_process_cancelled =True ;break
if self .isInterruptionRequested ():was_process_cancelled =True ;break
post_processing_worker =PostProcessorWorker (
post_data =individual_post_data ,
download_root =self .output_dir ,
known_names =self .known_names ,
filter_character_list =self .filter_character_list_objects_initial ,
dynamic_character_filter_holder =self .dynamic_filter_holder ,
unwanted_keywords =self .unwanted_keywords ,
filter_mode =self .filter_mode ,
skip_zip =self .skip_zip ,skip_rar =self .skip_rar ,
use_subfolders =self .use_subfolders ,use_post_subfolders =self .use_post_subfolders ,
target_post_id_from_initial_url =self .initial_target_post_id ,
custom_folder_name =self .custom_folder_name ,
compress_images =self .compress_images ,download_thumbnails =self .download_thumbnails ,
service =self .service ,user_id =self .user_id ,
api_url_input =self .api_url_input ,
pause_event =self .pause_event ,
cancellation_event =self .cancellation_event ,
emitter =worker_signals_obj ,
downloaded_files =self .downloaded_files ,
downloaded_file_hashes =self .downloaded_file_hashes ,
downloaded_files_lock =self .downloaded_files_lock ,
downloaded_file_hashes_lock =self .downloaded_file_hashes_lock ,
skip_words_list =self .skip_words_list ,
skip_words_scope =self .skip_words_scope ,
show_external_links =self .show_external_links ,
extract_links_only =self .extract_links_only ,
num_file_threads =self .num_file_threads_for_worker ,
skip_current_file_flag =self .skip_current_file_flag ,
manga_mode_active =self .manga_mode_active ,
manga_filename_style =self .manga_filename_style ,
manga_date_prefix =self .manga_date_prefix ,
char_filter_scope =self .char_filter_scope ,
remove_from_filename_words_list =self .remove_from_filename_words_list ,
allow_multipart_download =self .allow_multipart_download ,
selected_cookie_file =self .selected_cookie_file ,
app_base_dir =self .app_base_dir ,
cookie_text =self .cookie_text ,
override_output_dir =self .override_output_dir ,
manga_global_file_counter_ref =self .manga_global_file_counter_ref ,
use_cookie =self .use_cookie ,
manga_date_file_counter_ref =self .manga_date_file_counter_ref ,
use_date_prefix_for_subfolder=self.use_date_prefix_for_subfolder,
keep_in_post_duplicates=self.keep_in_post_duplicates,
creator_download_folder_ignore_words =self .creator_download_folder_ignore_words ,
session_file_path=self.session_file_path,
session_lock=self.session_lock,
for posts_batch_data in post_generator:
if self.isInterruptionRequested():
was_process_cancelled = True
break
for individual_post_data in posts_batch_data:
if self.isInterruptionRequested():
was_process_cancelled = True
break
# Create the worker, now correctly passing single_pdf_mode
post_processing_worker = PostProcessorWorker(
post_data=individual_post_data,
download_root=self.output_dir,
known_names=self.known_names,
filter_character_list=self.filter_character_list_objects_initial,
dynamic_character_filter_holder=self.dynamic_filter_holder,
unwanted_keywords=self.unwanted_keywords,
filter_mode=self.filter_mode,
skip_zip=self.skip_zip, skip_rar=self.skip_rar,
use_subfolders=self.use_subfolders, use_post_subfolders=self.use_post_subfolders,
target_post_id_from_initial_url=self.initial_target_post_id,
custom_folder_name=self.custom_folder_name,
compress_images=self.compress_images, download_thumbnails=self.download_thumbnails,
service=self.service, user_id=self.user_id,
api_url_input=self.api_url_input,
pause_event=self.pause_event,
cancellation_event=self.cancellation_event,
emitter=worker_signals_obj,
downloaded_files=self.downloaded_files,
downloaded_file_hashes=self.downloaded_file_hashes,
downloaded_files_lock=self.downloaded_files_lock,
downloaded_file_hashes_lock=self.downloaded_file_hashes_lock,
skip_words_list=self.skip_words_list,
skip_words_scope=self.skip_words_scope,
show_external_links=self.show_external_links,
extract_links_only=self.extract_links_only,
num_file_threads=self.num_file_threads_for_worker,
skip_current_file_flag=self.skip_current_file_flag,
manga_mode_active=self.manga_mode_active,
manga_filename_style=self.manga_filename_style,
manga_date_prefix=self.manga_date_prefix,
char_filter_scope=self.char_filter_scope,
remove_from_filename_words_list=self.remove_from_filename_words_list,
allow_multipart_download=self.allow_multipart_download,
selected_cookie_file=self.selected_cookie_file,
app_base_dir=self.app_base_dir,
cookie_text=self.cookie_text,
override_output_dir=self.override_output_dir,
manga_global_file_counter_ref=self.manga_global_file_counter_ref,
use_cookie=self.use_cookie,
manga_date_file_counter_ref=self.manga_date_file_counter_ref,
use_date_prefix_for_subfolder=self.use_date_prefix_for_subfolder,
keep_in_post_duplicates=self.keep_in_post_duplicates,
creator_download_folder_ignore_words=self.creator_download_folder_ignore_words,
session_file_path=self.session_file_path,
session_lock=self.session_lock,
text_only_scope=self.text_only_scope,
text_export_format=self.text_export_format,
single_pdf_mode=self.single_pdf_mode, # <-- This is now correctly passed
project_root_dir=self.project_root_dir
)
try :
dl_count ,skip_count ,kept_originals_this_post ,retryable_failures ,permanent_failures ,history_data =post_processing_worker .process ()
grand_total_downloaded_files +=dl_count
grand_total_skipped_files +=skip_count
if kept_originals_this_post :
grand_list_of_kept_original_filenames .extend (kept_originals_this_post )
if retryable_failures :
self .retryable_file_failed_signal .emit (retryable_failures )
if history_data :
if len (self .history_candidates_buffer )<8 :
self .post_processed_for_history_signal .emit (history_data )
if permanent_failures :
self .permanent_file_failed_signal .emit (permanent_failures )
except Exception as proc_err :
post_id_for_err =individual_post_data .get ('id','N/A')
self .logger (f"❌ Error processing post {post_id_for_err } in DownloadThread: {proc_err }")
traceback .print_exc ()
num_potential_files_est =len (individual_post_data .get ('attachments',[]))+(1 if individual_post_data .get ('file')else 0 )
grand_total_skipped_files +=num_potential_files_est
if self .skip_current_file_flag and self .skip_current_file_flag .is_set ():
self .skip_current_file_flag .clear ()
self .logger (" Skip current file flag was processed and cleared by DownloadThread.")
self .msleep (10 )
if was_process_cancelled :break
if not was_process_cancelled and not self .isInterruptionRequested ():
self .logger ("✅ All posts processed or end of content reached by DownloadThread.")
try:
# Correctly unpack the 7 values returned from the worker
(dl_count, skip_count, kept_originals_this_post,
retryable_failures, permanent_failures,
history_data, temp_filepath) = post_processing_worker.process()
grand_total_downloaded_files += dl_count
grand_total_skipped_files += skip_count
if kept_originals_this_post:
grand_list_of_kept_original_filenames.extend(kept_originals_this_post)
if retryable_failures:
self.retryable_file_failed_signal.emit(retryable_failures)
if history_data:
if len(self.history_candidates_buffer) < 8:
self.post_processed_for_history_signal.emit(history_data)
if permanent_failures:
self.permanent_file_failed_signal.emit(permanent_failures)
# In single-threaded text mode, pass the temp file path back to the main window
if self.single_pdf_mode and temp_filepath:
self.progress_signal.emit(f"TEMP_FILE_PATH:{temp_filepath}")
except Exception as main_thread_err :
self .logger (f"\n❌ Critical error within DownloadThread run loop: {main_thread_err }")
traceback .print_exc ()
if not self .isInterruptionRequested ():was_process_cancelled =False
finally :
try :
if worker_signals_obj :
worker_signals_obj .progress_signal .disconnect (self .progress_signal )
worker_signals_obj .file_download_status_signal .disconnect (self .file_download_status_signal )
worker_signals_obj .external_link_signal .disconnect (self .external_link_signal )
worker_signals_obj .file_progress_signal .disconnect (self .file_progress_signal )
worker_signals_obj .missed_character_post_signal .disconnect (self .missed_character_post_signal )
worker_signals_obj .file_successfully_downloaded_signal .disconnect (self .file_successfully_downloaded_signal )
except Exception as proc_err:
post_id_for_err = individual_post_data.get('id', 'N/A')
self.logger(f"❌ Error processing post {post_id_for_err} in DownloadThread: {proc_err}")
traceback.print_exc()
num_potential_files_est = len(individual_post_data.get('attachments', [])) + (1 if individual_post_data.get('file') else 0)
grand_total_skipped_files += num_potential_files_est
if self.skip_current_file_flag and self.skip_current_file_flag.is_set():
self.skip_current_file_flag.clear()
self.logger(" Skip current file flag was processed and cleared by DownloadThread.")
self.msleep(10)
if was_process_cancelled:
break
if not was_process_cancelled and not self.isInterruptionRequested():
self.logger("✅ All posts processed or end of content reached by DownloadThread.")
except Exception as main_thread_err:
self.logger(f"\n❌ Critical error within DownloadThread run loop: {main_thread_err}")
traceback.print_exc()
finally:
try:
# Disconnect signals
if worker_signals_obj:
worker_signals_obj.progress_signal.disconnect(self.progress_signal)
worker_signals_obj.file_download_status_signal.disconnect(self.file_download_status_signal)
worker_signals_obj.external_link_signal.disconnect(self.external_link_signal)
worker_signals_obj.file_progress_signal.disconnect(self.file_progress_signal)
worker_signals_obj.missed_character_post_signal.disconnect(self.missed_character_post_signal)
worker_signals_obj.file_successfully_downloaded_signal.disconnect(self.file_successfully_downloaded_signal)
except (TypeError, RuntimeError) as e:
self.logger(f" Note during DownloadThread signal disconnection: {e}")
# Emit the final signal with all collected results
self.finished_signal.emit(grand_total_downloaded_files, grand_total_skipped_files, self.isInterruptionRequested(), grand_list_of_kept_original_filenames)
except (TypeError ,RuntimeError )as e :
self .logger (f" Note during DownloadThread signal disconnection: {e }")
self .finished_signal .emit (grand_total_downloaded_files ,grand_total_skipped_files ,self .isInterruptionRequested (),grand_list_of_kept_original_filenames )
def receive_add_character_result (self ,result ):
with QMutexLocker (self .prompt_mutex ):
self ._add_character_response =result

View File

@@ -144,7 +144,7 @@ class EmptyPopupDialog (QDialog ):
self .setMinimumSize (int (400 *scale_factor ),int (300 *scale_factor ))
self .parent_app =parent_app_ref
self .current_scope_mode =self .SCOPE_CHARACTERS
self.current_scope_mode = self.SCOPE_CREATORS
self .app_base_dir =app_base_dir
app_icon =get_app_icon_object ()

View File

@@ -126,6 +126,21 @@ class FavoriteArtistsDialog (QDialog ):
self .artist_list_widget .setVisible (show )
def _fetch_favorite_artists (self ):
if self.cookies_config['use_cookie']:
# Check if we can load cookies for at least one of the services.
kemono_cookies = prepare_cookies_for_request(True, self.cookies_config['cookie_text'], self.cookies_config['selected_cookie_file'], self.cookies_config['app_base_dir'], self._logger, target_domain="kemono.su")
coomer_cookies = prepare_cookies_for_request(True, self.cookies_config['cookie_text'], self.cookies_config['selected_cookie_file'], self.cookies_config['app_base_dir'], self._logger, target_domain="coomer.su")
if not kemono_cookies and not coomer_cookies:
# If cookies are enabled but none could be loaded, show help and stop.
self.status_label.setText(self._tr("fav_artists_cookies_required_status", "Error: Cookies enabled but could not be loaded for any source."))
self._logger("Error: Cookies enabled but no valid cookies were loaded. Showing help dialog.")
cookie_help_dialog = CookieHelpDialog(self.parent_app, self)
cookie_help_dialog.exec_()
self.download_button.setEnabled(False)
return # Stop further execution
kemono_fav_url ="https://kemono.su/api/v1/account/favorites?type=artist"
coomer_fav_url ="https://coomer.su/api/v1/account/favorites?type=artist"

View File

@@ -0,0 +1,83 @@
from PyQt5.QtWidgets import (
QDialog, QVBoxLayout, QRadioButton, QDialogButtonBox, QButtonGroup, QLabel, QComboBox, QHBoxLayout, QCheckBox
)
from PyQt5.QtCore import Qt
class MoreOptionsDialog(QDialog):
"""
A dialog for selecting a scope, export format, and single PDF option.
"""
SCOPE_CONTENT = "content"
SCOPE_COMMENTS = "comments"
def __init__(self, parent=None, current_scope=None, current_format=None, single_pdf_checked=False):
super().__init__(parent)
self.setWindowTitle("More Options")
self.setMinimumWidth(350)
# ... (Layout and other widgets remain the same) ...
layout = QVBoxLayout(self)
self.description_label = QLabel("Please choose the scope for the action:")
layout.addWidget(self.description_label)
self.radio_button_group = QButtonGroup(self)
self.radio_content = QRadioButton("Description/Content")
self.radio_comments = QRadioButton("Comments")
self.radio_button_group.addButton(self.radio_content)
self.radio_button_group.addButton(self.radio_comments)
layout.addWidget(self.radio_content)
layout.addWidget(self.radio_comments)
if current_scope == self.SCOPE_COMMENTS:
self.radio_comments.setChecked(True)
else:
self.radio_content.setChecked(True)
export_layout = QHBoxLayout()
export_label = QLabel("Export as:")
self.format_combo = QComboBox()
self.format_combo.addItems(["PDF", "DOCX", "TXT"])
if current_format and current_format.upper() in ["PDF", "DOCX", "TXT"]:
self.format_combo.setCurrentText(current_format.upper())
else:
self.format_combo.setCurrentText("PDF")
export_layout.addWidget(export_label)
export_layout.addWidget(self.format_combo)
export_layout.addStretch()
layout.addLayout(export_layout)
# --- UPDATED: Single PDF Checkbox ---
self.single_pdf_checkbox = QCheckBox("Single PDF")
self.single_pdf_checkbox.setToolTip("If checked, all text from matching posts will be compiled into one single PDF file.")
self.single_pdf_checkbox.setChecked(single_pdf_checked)
layout.addWidget(self.single_pdf_checkbox)
self.format_combo.currentTextChanged.connect(self.update_single_pdf_checkbox_state)
self.update_single_pdf_checkbox_state(self.format_combo.currentText())
self.button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel)
self.button_box.accepted.connect(self.accept)
self.button_box.rejected.connect(self.reject)
layout.addWidget(self.button_box)
self.setLayout(layout)
def update_single_pdf_checkbox_state(self, text):
"""Enable the Single PDF checkbox only if the format is PDF."""
is_pdf = (text.upper() == "PDF")
self.single_pdf_checkbox.setEnabled(is_pdf)
if not is_pdf:
self.single_pdf_checkbox.setChecked(False)
def get_selected_scope(self):
if self.radio_comments.isChecked():
return self.SCOPE_COMMENTS
return self.SCOPE_CONTENT
def get_selected_format(self):
return self.format_combo.currentText().lower()
def get_single_pdf_state(self):
"""Returns the state of the Single PDF checkbox."""
return self.single_pdf_checkbox.isChecked() and self.single_pdf_checkbox.isEnabled()

View File

@@ -0,0 +1,77 @@
# SinglePDF.py
import os
try:
from fpdf import FPDF
FPDF_AVAILABLE = True
except ImportError:
FPDF_AVAILABLE = False
class PDF(FPDF):
"""Custom PDF class to handle headers and footers."""
def header(self):
# No header
pass
def footer(self):
# Position at 1.5 cm from bottom
self.set_y(-15)
self.set_font('DejaVu', '', 8)
# Page number
self.cell(0, 10, 'Page ' + str(self.page_no()), 0, 0, 'C')
def create_single_pdf_from_content(posts_data, output_filename, font_path, logger=print):
"""
Creates a single PDF from a list of post titles and content.
Args:
posts_data (list): A list of dictionaries, where each dict has 'title' and 'content' keys.
output_filename (str): The full path for the output PDF file.
font_path (str): Path to the DejaVuSans.ttf font file.
logger (function, optional): A function to log progress and errors. Defaults to print.
"""
if not FPDF_AVAILABLE:
logger("❌ PDF Creation failed: 'fpdf2' library is not installed. Please run: pip install fpdf2")
return False
if not posts_data:
logger(" No text content was collected to create a PDF.")
return False
pdf = PDF()
try:
if not os.path.exists(font_path):
raise RuntimeError("Font file not found.")
pdf.add_font('DejaVu', '', font_path, uni=True)
pdf.add_font('DejaVu', 'B', font_path, uni=True) # Add Bold variant
except Exception as font_error:
logger(f" ⚠️ Could not load DejaVu font: {font_error}")
logger(" PDF may not support all characters. Falling back to default Arial font.")
pdf.set_font('Arial', '', 12)
pdf.set_font('Arial', 'B', 16)
logger(f" Starting PDF creation with content from {len(posts_data)} posts...")
for post in posts_data:
pdf.add_page()
# Post Title
pdf.set_font('DejaVu', 'B', 16)
# vvv THIS LINE IS CORRECTED vvv
# We explicitly set align='L' and remove the incorrect positional arguments.
pdf.multi_cell(w=0, h=10, text=post.get('title', 'Untitled Post'), align='L')
pdf.ln(5) # Add a little space after the title
# Post Content
pdf.set_font('DejaVu', '', 12)
pdf.multi_cell(w=0, h=7, text=post.get('content', 'No Content'))
try:
pdf.output(output_filename)
logger(f"✅ Successfully created single PDF: '{os.path.basename(output_filename)}'")
return True
except Exception as e:
logger(f"❌ A critical error occurred while saving the final PDF: {e}")
return False

File diff suppressed because it is too large Load Diff