mirror of
https://github.com/Yuvi9587/Kemono-Downloader.git
synced 2025-12-17 15:36:51 +00:00
Commit
This commit is contained in:
parent
f0bf74da16
commit
dbdf82a079
97
data/dejavu-sans/DejaVu Fonts License.txt
Normal file
97
data/dejavu-sans/DejaVu Fonts License.txt
Normal file
@ -0,0 +1,97 @@
|
||||
Fonts are (c) Bitstream (see below). DejaVu changes are in public domain.
|
||||
Glyphs imported from Arev fonts are (c) Tavmjong Bah (see below)
|
||||
|
||||
Bitstream Vera Fonts Copyright
|
||||
------------------------------
|
||||
|
||||
Copyright (c) 2003 by Bitstream, Inc. All Rights Reserved. Bitstream Vera is
|
||||
a trademark of Bitstream, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of the fonts accompanying this license ("Fonts") and associated
|
||||
documentation files (the "Font Software"), to reproduce and distribute the
|
||||
Font Software, including without limitation the rights to use, copy, merge,
|
||||
publish, distribute, and/or sell copies of the Font Software, and to permit
|
||||
persons to whom the Font Software is furnished to do so, subject to the
|
||||
following conditions:
|
||||
|
||||
The above copyright and trademark notices and this permission notice shall
|
||||
be included in all copies of one or more of the Font Software typefaces.
|
||||
|
||||
The Font Software may be modified, altered, or added to, and in particular
|
||||
the designs of glyphs or characters in the Fonts may be modified and
|
||||
additional glyphs or characters may be added to the Fonts, only if the fonts
|
||||
are renamed to names not containing either the words "Bitstream" or the word
|
||||
"Vera".
|
||||
|
||||
This License becomes null and void to the extent applicable to Fonts or Font
|
||||
Software that has been modified and is distributed under the "Bitstream
|
||||
Vera" names.
|
||||
|
||||
The Font Software may be sold as part of a larger software package but no
|
||||
copy of one or more of the Font Software typefaces may be sold by itself.
|
||||
|
||||
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF COPYRIGHT, PATENT,
|
||||
TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL BITSTREAM OR THE GNOME
|
||||
FOUNDATION BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, INCLUDING
|
||||
ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
|
||||
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
|
||||
THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM OTHER DEALINGS IN THE
|
||||
FONT SOFTWARE.
|
||||
|
||||
Except as contained in this notice, the names of Gnome, the Gnome
|
||||
Foundation, and Bitstream Inc., shall not be used in advertising or
|
||||
otherwise to promote the sale, use or other dealings in this Font Software
|
||||
without prior written authorization from the Gnome Foundation or Bitstream
|
||||
Inc., respectively. For further information, contact: fonts at gnome dot
|
||||
org.
|
||||
|
||||
Arev Fonts Copyright
|
||||
------------------------------
|
||||
|
||||
Copyright (c) 2006 by Tavmjong Bah. All Rights Reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of the fonts accompanying this license ("Fonts") and
|
||||
associated documentation files (the "Font Software"), to reproduce
|
||||
and distribute the modifications to the Bitstream Vera Font Software,
|
||||
including without limitation the rights to use, copy, merge, publish,
|
||||
distribute, and/or sell copies of the Font Software, and to permit
|
||||
persons to whom the Font Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright and trademark notices and this permission notice
|
||||
shall be included in all copies of one or more of the Font Software
|
||||
typefaces.
|
||||
|
||||
The Font Software may be modified, altered, or added to, and in
|
||||
particular the designs of glyphs or characters in the Fonts may be
|
||||
modified and additional glyphs or characters may be added to the
|
||||
Fonts, only if the fonts are renamed to names not containing either
|
||||
the words "Tavmjong Bah" or the word "Arev".
|
||||
|
||||
This License becomes null and void to the extent applicable to Fonts
|
||||
or Font Software that has been modified and is distributed under the
|
||||
"Tavmjong Bah Arev" names.
|
||||
|
||||
The Font Software may be sold as part of a larger software package but
|
||||
no copy of one or more of the Font Software typefaces may be sold by
|
||||
itself.
|
||||
|
||||
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
||||
OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL
|
||||
TAVMJONG BAH BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
|
||||
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
|
||||
OTHER DEALINGS IN THE FONT SOFTWARE.
|
||||
|
||||
Except as contained in this notice, the name of Tavmjong Bah shall not
|
||||
be used in advertising or otherwise to promote the sale, use or other
|
||||
dealings in this Font Software without prior written authorization
|
||||
from Tavmjong Bah. For further information, contact: tavmjong @ free
|
||||
. fr.
|
||||
BIN
data/dejavu-sans/DejaVuSans-Bold.ttf
Normal file
BIN
data/dejavu-sans/DejaVuSans-Bold.ttf
Normal file
Binary file not shown.
BIN
data/dejavu-sans/DejaVuSans-BoldOblique.ttf
Normal file
BIN
data/dejavu-sans/DejaVuSans-BoldOblique.ttf
Normal file
Binary file not shown.
BIN
data/dejavu-sans/DejaVuSans-ExtraLight.ttf
Normal file
BIN
data/dejavu-sans/DejaVuSans-ExtraLight.ttf
Normal file
Binary file not shown.
BIN
data/dejavu-sans/DejaVuSans-Oblique.ttf
Normal file
BIN
data/dejavu-sans/DejaVuSans-Oblique.ttf
Normal file
Binary file not shown.
BIN
data/dejavu-sans/DejaVuSans.ttf
Normal file
BIN
data/dejavu-sans/DejaVuSans.ttf
Normal file
Binary file not shown.
BIN
data/dejavu-sans/DejaVuSansCondensed-Bold.ttf
Normal file
BIN
data/dejavu-sans/DejaVuSansCondensed-Bold.ttf
Normal file
Binary file not shown.
BIN
data/dejavu-sans/DejaVuSansCondensed-BoldOblique.ttf
Normal file
BIN
data/dejavu-sans/DejaVuSansCondensed-BoldOblique.ttf
Normal file
Binary file not shown.
BIN
data/dejavu-sans/DejaVuSansCondensed-Oblique.ttf
Normal file
BIN
data/dejavu-sans/DejaVuSansCondensed-Oblique.ttf
Normal file
Binary file not shown.
BIN
data/dejavu-sans/DejaVuSansCondensed.ttf
Normal file
BIN
data/dejavu-sans/DejaVuSansCondensed.ttf
Normal file
Binary file not shown.
5529
main_window_old.py
Normal file
5529
main_window_old.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,12 +1,10 @@
|
||||
# --- Standard Library Imports ---
|
||||
import time
|
||||
import traceback
|
||||
from urllib.parse import urlparse
|
||||
|
||||
# --- Third-Party Library Imports ---
|
||||
import json # Ensure json is imported
|
||||
import requests
|
||||
|
||||
# --- Local Application Imports ---
|
||||
# (Keep the rest of your imports)
|
||||
from ..utils.network_utils import extract_post_info, prepare_cookies_for_request
|
||||
from ..config.constants import (
|
||||
STYLE_DATE_POST_TITLE
|
||||
@ -15,36 +13,24 @@ from ..config.constants import (
|
||||
|
||||
def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_event=None, pause_event=None, cookies_dict=None):
|
||||
"""
|
||||
Fetches a single page of posts from the API with retry logic.
|
||||
|
||||
Args:
|
||||
api_url_base (str): The base URL for the user's posts.
|
||||
headers (dict): The request headers.
|
||||
offset (int): The offset for pagination.
|
||||
logger (callable): Function to log messages.
|
||||
cancellation_event (threading.Event): Event to signal cancellation.
|
||||
pause_event (threading.Event): Event to signal pause.
|
||||
cookies_dict (dict): A dictionary of cookies to include in the request.
|
||||
|
||||
Returns:
|
||||
list: A list of post data dictionaries from the API.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the fetch fails after all retries or encounters a non-retryable error.
|
||||
Fetches a single page of posts from the API with robust retry logic.
|
||||
NEW: Requests only essential fields to keep the response size small and reliable.
|
||||
"""
|
||||
if cancellation_event and cancellation_event.is_set():
|
||||
logger(" Fetch cancelled before request.")
|
||||
raise RuntimeError("Fetch operation cancelled by user.")
|
||||
if pause_event and pause_event.is_set():
|
||||
logger(" Post fetching paused...")
|
||||
while pause_event.is_set():
|
||||
if cancellation_event and cancellation_event.is_set():
|
||||
logger(" Post fetching cancelled while paused.")
|
||||
raise RuntimeError("Fetch operation cancelled by user.")
|
||||
raise RuntimeError("Fetch operation cancelled by user while paused.")
|
||||
time.sleep(0.5)
|
||||
logger(" Post fetching resumed.")
|
||||
|
||||
paginated_url = f'{api_url_base}?o={offset}'
|
||||
|
||||
# --- MODIFICATION: Added `fields` to the URL to request only metadata ---
|
||||
# This prevents the large 'content' field from being included in the list, avoiding timeouts.
|
||||
fields_to_request = "id,user,service,title,shared_file,added,published,edited,file,attachments,tags"
|
||||
paginated_url = f'{api_url_base}?o={offset}&fields={fields_to_request}'
|
||||
|
||||
max_retries = 3
|
||||
retry_delay = 5
|
||||
|
||||
@ -52,22 +38,18 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
|
||||
if cancellation_event and cancellation_event.is_set():
|
||||
raise RuntimeError("Fetch operation cancelled by user during retry loop.")
|
||||
|
||||
log_message = f" Fetching: {paginated_url} (Page approx. {offset // 50 + 1})"
|
||||
log_message = f" Fetching post list: {api_url_base}?o={offset} (Page approx. {offset // 50 + 1})"
|
||||
if attempt > 0:
|
||||
log_message += f" (Attempt {attempt + 1}/{max_retries})"
|
||||
logger(log_message)
|
||||
|
||||
try:
|
||||
response = requests.get(paginated_url, headers=headers, timeout=(15, 90), cookies=cookies_dict)
|
||||
# We can now remove the streaming logic as the response will be small and fast.
|
||||
response = requests.get(paginated_url, headers=headers, timeout=(15, 60), cookies=cookies_dict)
|
||||
response.raise_for_status()
|
||||
|
||||
if 'application/json' not in response.headers.get('Content-Type', '').lower():
|
||||
logger(f"⚠️ Unexpected content type from API: {response.headers.get('Content-Type')}. Body: {response.text[:200]}")
|
||||
return []
|
||||
|
||||
return response.json()
|
||||
|
||||
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger(f" ⚠️ Retryable network error on page fetch (Attempt {attempt + 1}): {e}")
|
||||
if attempt < max_retries - 1:
|
||||
delay = retry_delay * (2 ** attempt)
|
||||
@ -76,18 +58,46 @@ def fetch_posts_paginated(api_url_base, headers, offset, logger, cancellation_ev
|
||||
continue
|
||||
else:
|
||||
logger(f" ❌ Failed to fetch page after {max_retries} attempts.")
|
||||
raise RuntimeError(f"Timeout or connection error fetching offset {offset}")
|
||||
except requests.exceptions.RequestException as e:
|
||||
err_msg = f"Error fetching offset {offset}: {e}"
|
||||
if e.response is not None:
|
||||
err_msg += f" (Status: {e.response.status_code}, Body: {e.response.text[:200]})"
|
||||
raise RuntimeError(err_msg)
|
||||
except ValueError as e: # JSON decode error
|
||||
raise RuntimeError(f"Error decoding JSON from offset {offset}: {e}. Response: {response.text[:200]}")
|
||||
raise RuntimeError(f"Network error fetching offset {offset}")
|
||||
except json.JSONDecodeError as e:
|
||||
logger(f" ❌ Failed to decode JSON on page fetch (Attempt {attempt + 1}): {e}")
|
||||
if attempt < max_retries - 1:
|
||||
delay = retry_delay * (2 ** attempt)
|
||||
logger(f" Retrying in {delay} seconds...")
|
||||
time.sleep(delay)
|
||||
continue
|
||||
else:
|
||||
raise RuntimeError(f"JSONDecodeError fetching offset {offset}")
|
||||
|
||||
raise RuntimeError(f"Failed to fetch page {paginated_url} after all attempts.")
|
||||
|
||||
|
||||
def fetch_single_post_data(api_domain, service, user_id, post_id, headers, logger, cookies_dict=None):
|
||||
"""
|
||||
--- NEW FUNCTION ---
|
||||
Fetches the full data, including the 'content' field, for a single post.
|
||||
"""
|
||||
post_api_url = f"https://{api_domain}/api/v1/{service}/user/{user_id}/post/{post_id}"
|
||||
logger(f" Fetching full content for post ID {post_id}...")
|
||||
try:
|
||||
# Use streaming here as a precaution for single posts that are still very large.
|
||||
with requests.get(post_api_url, headers=headers, timeout=(15, 300), cookies=cookies_dict, stream=True) as response:
|
||||
response.raise_for_status()
|
||||
response_body = b""
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
response_body += chunk
|
||||
|
||||
full_post_data = json.loads(response_body)
|
||||
# The API sometimes wraps the post in a list, handle that.
|
||||
if isinstance(full_post_data, list) and full_post_data:
|
||||
return full_post_data[0]
|
||||
return full_post_data
|
||||
|
||||
except Exception as e:
|
||||
logger(f" ❌ Failed to fetch full content for post {post_id}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def fetch_post_comments(api_domain, service, user_id, post_id, headers, logger, cancellation_event=None, pause_event=None, cookies_dict=None):
|
||||
"""Fetches all comments for a specific post."""
|
||||
if cancellation_event and cancellation_event.is_set():
|
||||
|
||||
@ -20,6 +20,26 @@ try:
|
||||
from PIL import Image
|
||||
except ImportError:
|
||||
Image = None
|
||||
#
|
||||
try:
|
||||
from fpdf import FPDF
|
||||
# Add a simple class to handle the header/footer for stories
|
||||
class PDF(FPDF):
|
||||
def header(self):
|
||||
pass # No header
|
||||
def footer(self):
|
||||
self.set_y(-15)
|
||||
self.set_font('Arial', 'I', 8)
|
||||
self.cell(0, 10, 'Page %s' % self.page_no(), 0, 0, 'C')
|
||||
|
||||
except ImportError:
|
||||
FPDF = None
|
||||
|
||||
try:
|
||||
from docx import Document
|
||||
except ImportError:
|
||||
Document = None
|
||||
|
||||
# --- PyQt5 Imports ---
|
||||
from PyQt5 .QtCore import Qt ,QThread ,pyqtSignal ,QMutex ,QMutexLocker ,QObject ,QTimer ,QSettings ,QStandardPaths ,QCoreApplication ,QUrl ,QSize ,QProcess
|
||||
# --- Local Application Imports ---
|
||||
@ -48,7 +68,8 @@ class PostProcessorSignals (QObject ):
|
||||
file_progress_signal =pyqtSignal (str ,object )
|
||||
file_successfully_downloaded_signal =pyqtSignal (dict )
|
||||
missed_character_post_signal =pyqtSignal (str ,str )
|
||||
|
||||
worker_finished_signal = pyqtSignal(tuple)
|
||||
|
||||
class PostProcessorWorker:
|
||||
def __init__ (self ,post_data ,download_root ,known_names ,
|
||||
filter_character_list ,emitter ,
|
||||
@ -81,6 +102,10 @@ class PostProcessorWorker:
|
||||
keep_in_post_duplicates=False,
|
||||
session_file_path=None,
|
||||
session_lock=None,
|
||||
text_only_scope=None,
|
||||
text_export_format='txt',
|
||||
single_pdf_mode=False,
|
||||
project_root_dir=None,
|
||||
):
|
||||
self .post =post_data
|
||||
self .download_root =download_root
|
||||
@ -134,6 +159,10 @@ class PostProcessorWorker:
|
||||
self.keep_in_post_duplicates = keep_in_post_duplicates
|
||||
self.session_file_path = session_file_path
|
||||
self.session_lock = session_lock
|
||||
self.text_only_scope = text_only_scope
|
||||
self.text_export_format = text_export_format
|
||||
self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE
|
||||
self.project_root_dir = project_root_dir
|
||||
if self .compress_images and Image is None :
|
||||
|
||||
self .logger ("⚠️ Image compression disabled: Pillow library not found.")
|
||||
@ -557,6 +586,8 @@ class PostProcessorWorker:
|
||||
final_total_for_progress =total_size_bytes if download_successful_flag and total_size_bytes >0 else downloaded_size_bytes
|
||||
self ._emit_signal ('file_progress',api_original_filename ,(downloaded_size_bytes ,final_total_for_progress ))
|
||||
|
||||
# --- Start of Replacement Block ---
|
||||
|
||||
# Rescue download if an IncompleteRead error occurred but the file is complete
|
||||
if (not download_successful_flag and
|
||||
isinstance(last_exception_for_retry_later, http.client.IncompleteRead) and
|
||||
@ -614,33 +645,32 @@ class PostProcessorWorker:
|
||||
is_img_for_compress_check = is_image(api_original_filename)
|
||||
|
||||
if is_img_for_compress_check and self.compress_images and Image and downloaded_size_bytes > (1.5 * 1024 * 1024):
|
||||
# ... (This block for image compression remains the same)
|
||||
self .logger (f" Compressing '{api_original_filename }' ({downloaded_size_bytes /(1024 *1024 ):.2f} MB)...")
|
||||
if self ._check_pause (f"Image compression for '{api_original_filename }'"):return 0 ,1 ,filename_to_save_in_main_path ,was_original_name_kept_flag ,FILE_DOWNLOAD_STATUS_SKIPPED ,None
|
||||
img_content_for_pillow =None
|
||||
try :
|
||||
with open (downloaded_part_file_path ,'rb')as f_img_in :
|
||||
img_content_for_pillow =BytesIO (f_img_in .read ())
|
||||
with Image .open (img_content_for_pillow )as img_obj :
|
||||
if img_obj .mode =='P':img_obj =img_obj .convert ('RGBA')
|
||||
elif img_obj .mode not in ['RGB','RGBA','L']:img_obj =img_obj .convert ('RGB')
|
||||
compressed_output_io =BytesIO ()
|
||||
img_obj .save (compressed_output_io ,format ='WebP',quality =80 ,method =4 )
|
||||
compressed_size =compressed_output_io .getbuffer ().nbytes
|
||||
if compressed_size <downloaded_size_bytes *0.9 :
|
||||
self .logger (f" Compression success: {compressed_size /(1024 *1024 ):.2f} MB.")
|
||||
data_to_write_io =compressed_output_io
|
||||
data_to_write_io .seek (0 )
|
||||
base_name_orig ,_ =os .path .splitext (filename_after_compression )
|
||||
filename_after_compression =base_name_orig +'.webp'
|
||||
self .logger (f" Updated filename (compressed): {filename_after_compression }")
|
||||
else :
|
||||
self .logger (f" Compression skipped: WebP not significantly smaller.")
|
||||
if compressed_output_io :compressed_output_io .close ()
|
||||
except Exception as comp_e :
|
||||
self .logger (f"❌ Compression failed for '{api_original_filename }': {comp_e }. Saving original.")
|
||||
finally :
|
||||
if img_content_for_pillow :img_content_for_pillow .close ()
|
||||
self.logger(f" Compressing '{api_original_filename}' ({downloaded_size_bytes / (1024 * 1024):.2f} MB)...")
|
||||
if self._check_pause(f"Image compression for '{api_original_filename}'"): return 0, 1, filename_to_save_in_main_path, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
|
||||
img_content_for_pillow = None
|
||||
try:
|
||||
with open(downloaded_part_file_path, 'rb') as f_img_in:
|
||||
img_content_for_pillow = BytesIO(f_img_in.read())
|
||||
with Image.open(img_content_for_pillow) as img_obj:
|
||||
if img_obj.mode == 'P': img_obj = img_obj.convert('RGBA')
|
||||
elif img_obj.mode not in ['RGB', 'RGBA', 'L']: img_obj = img_obj.convert('RGB')
|
||||
compressed_output_io = BytesIO()
|
||||
img_obj.save(compressed_output_io, format='WebP', quality=80, method=4)
|
||||
compressed_size = compressed_output_io.getbuffer().nbytes
|
||||
if compressed_size < downloaded_size_bytes * 0.9:
|
||||
self.logger(f" Compression success: {compressed_size / (1024 * 1024):.2f} MB.")
|
||||
data_to_write_io = compressed_output_io
|
||||
data_to_write_io.seek(0)
|
||||
base_name_orig, _ = os.path.splitext(filename_after_compression)
|
||||
filename_after_compression = base_name_orig + '.webp'
|
||||
self.logger(f" Updated filename (compressed): {filename_after_compression}")
|
||||
else:
|
||||
self.logger(f" Compression skipped: WebP not significantly smaller.")
|
||||
if compressed_output_io: compressed_output_io.close()
|
||||
except Exception as comp_e:
|
||||
self.logger(f"❌ Compression failed for '{api_original_filename}': {comp_e}. Saving original.")
|
||||
finally:
|
||||
if img_content_for_pillow: img_content_for_pillow.close()
|
||||
|
||||
final_filename_on_disk = filename_after_compression
|
||||
temp_base, temp_ext = os.path.splitext(final_filename_on_disk)
|
||||
@ -695,11 +725,14 @@ class PostProcessorWorker:
|
||||
|
||||
return 1, 0, final_filename_saved_for_return, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SUCCESS, None
|
||||
except Exception as save_err:
|
||||
self.logger(f"->>Save Fail for '{final_filename_on_disk}': {save_err}")
|
||||
if os.path.exists(final_save_path):
|
||||
try: os.remove(final_save_path)
|
||||
except OSError: self.logger(f" -> Failed to remove partially saved file: {final_save_path}")
|
||||
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_SKIPPED, None
|
||||
self.logger(f"->>Save Fail for '{final_filename_on_disk}': {save_err}")
|
||||
if os.path.exists(final_save_path):
|
||||
try: os.remove(final_save_path)
|
||||
except OSError: self.logger(f" -> Failed to remove partially saved file: {final_save_path}")
|
||||
|
||||
# --- FIX: Report as a permanent failure so it appears in the error dialog ---
|
||||
permanent_failure_details = { 'file_info': file_info, 'target_folder_path': target_folder_path, 'headers': headers, 'original_post_id_for_log': original_post_id_for_log, 'post_title': post_title, 'file_index_in_post': file_index_in_post, 'num_files_in_this_post': num_files_in_this_post, 'forced_filename_override': filename_to_save_in_main_path, }
|
||||
return 0, 1, final_filename_saved_for_return, was_original_name_kept_flag, FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION, permanent_failure_details
|
||||
finally:
|
||||
if data_to_write_io and hasattr(data_to_write_io, 'close'):
|
||||
data_to_write_io.close()
|
||||
@ -738,14 +771,16 @@ class PostProcessorWorker:
|
||||
effective_save_folder =target_folder_path
|
||||
filename_after_styling_and_word_removal =filename_to_save_in_main_path
|
||||
|
||||
try :
|
||||
os .makedirs (effective_save_folder ,exist_ok =True )
|
||||
except OSError as e :
|
||||
self .logger (f" ❌ Critical error creating directory '{effective_save_folder }': {e }. Skipping file '{api_original_filename }'.")
|
||||
if downloaded_part_file_path and os .path .exists (downloaded_part_file_path ):
|
||||
try :os .remove (downloaded_part_file_path )
|
||||
except OSError :pass
|
||||
return 0 ,1 ,api_original_filename ,False ,FILE_DOWNLOAD_STATUS_SKIPPED ,None
|
||||
try:
|
||||
os.makedirs(effective_save_folder, exist_ok=True)
|
||||
except OSError as e:
|
||||
self.logger(f" ❌ Critical error creating directory '{effective_save_folder}': {e}. Skipping file '{api_original_filename}'.")
|
||||
if downloaded_part_file_path and os.path.exists(downloaded_part_file_path):
|
||||
try: os.remove(downloaded_part_file_path)
|
||||
except OSError: pass
|
||||
# --- FIX: Report as a permanent failure so it appears in the error dialog ---
|
||||
permanent_failure_details = { 'file_info': file_info, 'target_folder_path': target_folder_path, 'headers': headers, 'original_post_id_for_log': original_post_id_for_log, 'post_title': post_title, 'file_index_in_post': file_index_in_post, 'num_files_in_this_post': num_files_in_this_post, 'forced_filename_override': filename_to_save_in_main_path, }
|
||||
return 0, 1, api_original_filename, False, FILE_DOWNLOAD_STATUS_FAILED_PERMANENTLY_THIS_SESSION, permanent_failure_details
|
||||
|
||||
data_to_write_io =None
|
||||
filename_after_compression =filename_after_styling_and_word_removal
|
||||
@ -849,8 +884,8 @@ class PostProcessorWorker:
|
||||
data_to_write_io .close ()
|
||||
|
||||
def process (self ):
|
||||
if self ._check_pause (f"Post processing for ID {self .post .get ('id','N/A')}"):return 0 ,0 ,[],[],[],None
|
||||
if self .check_cancel ():return 0 ,0 ,[],[],[],None
|
||||
if self ._check_pause (f"Post processing for ID {self .post .get ('id','N/A')}"):return 0 ,0 ,[],[],[],None, None
|
||||
if self .check_cancel ():return 0 ,0 ,[],[],[],None, None
|
||||
current_character_filters =self ._get_current_character_filters ()
|
||||
kept_original_filenames_for_log =[]
|
||||
retryable_failures_this_post =[]
|
||||
@ -986,23 +1021,23 @@ class PostProcessorWorker:
|
||||
if self .char_filter_scope ==CHAR_SCOPE_TITLE and not post_is_candidate_by_title_char_match :
|
||||
self .logger (f" -> Skip Post (Scope: Title - No Char Match): Title '{post_title [:50 ]}' does not match character filters.")
|
||||
self ._emit_signal ('missed_character_post',post_title ,"No title match for character filter")
|
||||
return 0 ,num_potential_files_in_post ,[],[],[],None
|
||||
return 0 ,num_potential_files_in_post ,[],[],[],None, None
|
||||
if self .char_filter_scope ==CHAR_SCOPE_COMMENTS and not post_is_candidate_by_file_char_match_in_comment_scope and not post_is_candidate_by_comment_char_match :
|
||||
self .logger (f" -> Skip Post (Scope: Comments - No Char Match in Comments): Post ID '{post_id }', Title '{post_title [:50 ]}...'")
|
||||
if self .emitter and hasattr (self .emitter ,'missed_character_post_signal'):
|
||||
self ._emit_signal ('missed_character_post',post_title ,"No character match in files or comments (Comments scope)")
|
||||
return 0 ,num_potential_files_in_post ,[],[],[],None
|
||||
return 0 ,num_potential_files_in_post ,[],[],[],None, None
|
||||
if self .skip_words_list and (self .skip_words_scope ==SKIP_SCOPE_POSTS or self .skip_words_scope ==SKIP_SCOPE_BOTH ):
|
||||
if self ._check_pause (f"Skip words (post title) for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
|
||||
post_title_lower =post_title .lower ()
|
||||
for skip_word in self .skip_words_list :
|
||||
if skip_word .lower ()in post_title_lower :
|
||||
self .logger (f" -> Skip Post (Keyword in Title '{skip_word }'): '{post_title [:50 ]}...'. Scope: {self .skip_words_scope }")
|
||||
return 0 ,num_potential_files_in_post ,[],[],[],None
|
||||
return 0 ,num_potential_files_in_post ,[],[],[],None, None
|
||||
if not self .extract_links_only and self .manga_mode_active and current_character_filters and (self .char_filter_scope ==CHAR_SCOPE_TITLE or self .char_filter_scope ==CHAR_SCOPE_BOTH )and not post_is_candidate_by_title_char_match :
|
||||
self .logger (f" -> Skip Post (Manga Mode with Title/Both Scope - No Title Char Match): Title '{post_title [:50 ]}' doesn't match filters.")
|
||||
self ._emit_signal ('missed_character_post',post_title ,"Manga Mode: No title match for character filter (Title/Both scope)")
|
||||
return 0 ,num_potential_files_in_post ,[],[],[],None
|
||||
return 0 ,num_potential_files_in_post ,[],[],[],None, None
|
||||
if not isinstance (post_attachments ,list ):
|
||||
self .logger (f"⚠️ Corrupt attachment data for post {post_id } (expected list, got {type (post_attachments )}). Skipping attachments.")
|
||||
post_attachments =[]
|
||||
@ -1171,6 +1206,156 @@ class PostProcessorWorker:
|
||||
break
|
||||
|
||||
determined_post_save_path_for_history =os .path .join (base_path_for_post_subfolder ,final_post_subfolder_name )
|
||||
if self.filter_mode == 'text_only' and not self.extract_links_only:
|
||||
self.logger(f" Mode: Text Only (Scope: {self.text_only_scope})")
|
||||
|
||||
# --- Apply Title-based filters to ensure post is a candidate ---
|
||||
post_title_lower = post_title.lower()
|
||||
if self.skip_words_list and (self.skip_words_scope == SKIP_SCOPE_POSTS or self.skip_words_scope == SKIP_SCOPE_BOTH):
|
||||
for skip_word in self.skip_words_list:
|
||||
if skip_word.lower() in post_title_lower:
|
||||
self.logger(f" -> Skip Post (Keyword in Title '{skip_word}'): '{post_title[:50]}...'.")
|
||||
return 0, num_potential_files_in_post, [], [], [], None, None
|
||||
|
||||
if current_character_filters and not post_is_candidate_by_title_char_match and not post_is_candidate_by_comment_char_match and not post_is_candidate_by_file_char_match_in_comment_scope:
|
||||
self.logger(f" -> Skip Post (No character match for text extraction): '{post_title[:50]}...'.")
|
||||
return 0, num_potential_files_in_post, [], [], [], None, None
|
||||
|
||||
# --- Get the text content based on scope ---
|
||||
raw_text_content = ""
|
||||
final_post_data = post_data
|
||||
|
||||
# Fetch full post data if content is missing and scope is 'content'
|
||||
if self.text_only_scope == 'content' and 'content' not in final_post_data:
|
||||
self.logger(f" Post {post_id} is missing 'content' field, fetching full data...")
|
||||
parsed_url = urlparse(self.api_url_input)
|
||||
api_domain = parsed_url.netloc
|
||||
cookies = prepare_cookies_for_request(self.use_cookie, self.cookie_text, self.selected_cookie_file, self.app_base_dir, self.logger, target_domain=api_domain)
|
||||
|
||||
from .api_client import fetch_single_post_data # Local import to avoid circular dependency issues
|
||||
full_data = fetch_single_post_data(api_domain, self.service, self.user_id, post_id, headers, self.logger, cookies_dict=cookies)
|
||||
if full_data:
|
||||
final_post_data = full_data
|
||||
|
||||
if self.text_only_scope == 'content':
|
||||
raw_text_content = final_post_data.get('content', '')
|
||||
elif self.text_only_scope == 'comments':
|
||||
try:
|
||||
parsed_url = urlparse(self.api_url_input)
|
||||
api_domain = parsed_url.netloc
|
||||
comments_data = fetch_post_comments(api_domain, self.service, self.user_id, post_id, headers, self.logger, self.cancellation_event, self.pause_event)
|
||||
if comments_data:
|
||||
comment_texts = []
|
||||
for comment in comments_data:
|
||||
user = comment.get('user', {}).get('name', 'Unknown User')
|
||||
timestamp = comment.get('updated', 'No Date')
|
||||
body = strip_html_tags(comment.get('content', ''))
|
||||
comment_texts.append(f"--- Comment by {user} on {timestamp} ---\n{body}\n")
|
||||
raw_text_content = "\n".join(comment_texts)
|
||||
except Exception as e:
|
||||
self.logger(f" ❌ Error fetching comments for text-only mode: {e}")
|
||||
|
||||
if not raw_text_content or not raw_text_content.strip():
|
||||
self.logger(" -> Skip Saving Text: No content/comments found or fetched.")
|
||||
return 0, num_potential_files_in_post, [], [], [], None, None
|
||||
|
||||
# --- Robust HTML-to-TEXT Conversion ---
|
||||
paragraph_pattern = re.compile(r'<p.*?>(.*?)</p>', re.IGNORECASE | re.DOTALL)
|
||||
html_paragraphs = paragraph_pattern.findall(raw_text_content)
|
||||
cleaned_text = ""
|
||||
if not html_paragraphs:
|
||||
self.logger(" ⚠️ No <p> tags found. Falling back to basic HTML cleaning for the whole block.")
|
||||
text_with_br = re.sub(r'<br\s*/?>', '\n', raw_text_content, flags=re.IGNORECASE)
|
||||
cleaned_text = re.sub(r'<.*?>', '', text_with_br)
|
||||
else:
|
||||
cleaned_paragraphs_list = []
|
||||
for p_content in html_paragraphs:
|
||||
p_with_br = re.sub(r'<br\s*/?>', '\n', p_content, flags=re.IGNORECASE)
|
||||
p_cleaned = re.sub(r'<.*?>', '', p_with_br)
|
||||
p_final = html.unescape(p_cleaned).strip()
|
||||
if p_final:
|
||||
cleaned_paragraphs_list.append(p_final)
|
||||
cleaned_text = '\n\n'.join(cleaned_paragraphs_list)
|
||||
cleaned_text = cleaned_text.replace('…', '...')
|
||||
|
||||
# --- Logic for Single PDF Mode (File-based) ---
|
||||
if self.single_pdf_mode:
|
||||
if not cleaned_text:
|
||||
return 0, 0, [], [], [], None, None
|
||||
|
||||
content_data = {
|
||||
'title': post_title,
|
||||
'content': cleaned_text,
|
||||
'published': self.post.get('published') or self.post.get('added')
|
||||
}
|
||||
temp_dir = os.path.join(self.app_base_dir, "appdata")
|
||||
os.makedirs(temp_dir, exist_ok=True)
|
||||
temp_filename = f"tmp_{post_id}_{uuid.uuid4().hex[:8]}.json"
|
||||
temp_filepath = os.path.join(temp_dir, temp_filename)
|
||||
|
||||
try:
|
||||
with open(temp_filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(content_data, f, indent=2)
|
||||
self.logger(f" Saved temporary text for '{post_title}' for single PDF compilation.")
|
||||
return 0, 0, [], [], [], None, temp_filepath
|
||||
except Exception as e:
|
||||
self.logger(f" ❌ Failed to write temporary file for single PDF: {e}")
|
||||
return 0, 0, [], [], [], None, None
|
||||
|
||||
# --- Logic for Individual File Saving ---
|
||||
else:
|
||||
file_extension = self.text_export_format
|
||||
txt_filename = clean_filename(post_title) + f".{file_extension}"
|
||||
final_save_path = os.path.join(determined_post_save_path_for_history, txt_filename)
|
||||
|
||||
try:
|
||||
os.makedirs(determined_post_save_path_for_history, exist_ok=True)
|
||||
base, ext = os.path.splitext(final_save_path)
|
||||
counter = 1
|
||||
while os.path.exists(final_save_path):
|
||||
final_save_path = f"{base}_{counter}{ext}"
|
||||
counter += 1
|
||||
|
||||
if file_extension == 'pdf':
|
||||
if FPDF:
|
||||
self.logger(f" Converting to PDF...")
|
||||
pdf = PDF()
|
||||
font_path = os.path.join(self.app_base_dir, 'data', 'dejavu-sans', 'DejaVuSans.ttf')
|
||||
try:
|
||||
if not os.path.exists(font_path): raise RuntimeError(f"Font file not found: {font_path}")
|
||||
pdf.add_font('DejaVu', '', font_path, uni=True)
|
||||
pdf.set_font('DejaVu', '', 12)
|
||||
except Exception as font_error:
|
||||
self.logger(f" ⚠️ Could not load DejaVu font: {font_error}. Falling back to Arial.")
|
||||
pdf.set_font('Arial', '', 12)
|
||||
pdf.add_page()
|
||||
pdf.multi_cell(0, 5, cleaned_text)
|
||||
pdf.output(final_save_path)
|
||||
else:
|
||||
self.logger(f" ⚠️ Cannot create PDF: 'fpdf2' library not installed. Saving as .txt.")
|
||||
final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
|
||||
with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
|
||||
|
||||
elif file_extension == 'docx':
|
||||
if Document:
|
||||
self.logger(f" Converting to DOCX...")
|
||||
document = Document()
|
||||
document.add_paragraph(cleaned_text)
|
||||
document.save(final_save_path)
|
||||
else:
|
||||
self.logger(f" ⚠️ Cannot create DOCX: 'python-docx' library not installed. Saving as .txt.")
|
||||
final_save_path = os.path.splitext(final_save_path)[0] + ".txt"
|
||||
with open(final_save_path, 'w', encoding='utf-8') as f: f.write(cleaned_text)
|
||||
|
||||
else: # Default to TXT
|
||||
with open(final_save_path, 'w', encoding='utf-8') as f:
|
||||
f.write(cleaned_text)
|
||||
|
||||
self.logger(f"✅ Saved Text: '{os.path.basename(final_save_path)}' in '{os.path.basename(determined_post_save_path_for_history)}'")
|
||||
return 1, num_potential_files_in_post, [], [], [], history_data_for_this_post, None
|
||||
except Exception as e:
|
||||
self.logger(f" ❌ Critical error saving text file '{txt_filename}': {e}")
|
||||
return 0, num_potential_files_in_post, [], [], [], None, None
|
||||
|
||||
if not self .extract_links_only and self .use_subfolders and self .skip_words_list :
|
||||
if self ._check_pause (f"Folder keyword skip check for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
|
||||
@ -1179,7 +1364,7 @@ class PostProcessorWorker:
|
||||
if any (skip_word .lower ()in folder_name_to_check .lower ()for skip_word in self .skip_words_list ):
|
||||
matched_skip =next ((sw for sw in self .skip_words_list if sw .lower ()in folder_name_to_check .lower ()),"unknown_skip_word")
|
||||
self .logger (f" -> Skip Post (Folder Keyword): Potential folder '{folder_name_to_check }' contains '{matched_skip }'.")
|
||||
return 0 ,num_potential_files_in_post ,[],[],[],None
|
||||
return 0 ,num_potential_files_in_post ,[],[],[],None, None
|
||||
if (self .show_external_links or self .extract_links_only )and post_content_html :
|
||||
if self ._check_pause (f"External link extraction for post {post_id }"):return 0 ,num_potential_files_in_post ,[],[],[],None
|
||||
try :
|
||||
@ -1555,7 +1740,17 @@ class PostProcessorWorker:
|
||||
except OSError as e_rmdir :
|
||||
self .logger (f" ⚠️ Could not remove empty post-specific subfolder '{path_to_check_for_emptiness }': {e_rmdir }")
|
||||
|
||||
return total_downloaded_this_post ,total_skipped_this_post ,kept_original_filenames_for_log ,retryable_failures_this_post ,permanent_failures_this_post ,history_data_for_this_post
|
||||
result_tuple = (total_downloaded_this_post, total_skipped_this_post,
|
||||
kept_original_filenames_for_log, retryable_failures_this_post,
|
||||
permanent_failures_this_post, history_data_for_this_post,
|
||||
None) # The 7th item is None because we already saved the temp file
|
||||
|
||||
# In Single PDF mode, the 7th item is the temp file path we created.
|
||||
if self.single_pdf_mode and os.path.exists(temp_filepath):
|
||||
result_tuple = (0, 0, [], [], [], None, temp_filepath)
|
||||
|
||||
self._emit_signal('worker_finished', result_tuple)
|
||||
return # The method now returns nothing.
|
||||
|
||||
class DownloadThread (QThread ):
|
||||
progress_signal =pyqtSignal (str )
|
||||
@ -1605,6 +1800,10 @@ class DownloadThread (QThread ):
|
||||
cookie_text ="",
|
||||
session_file_path=None,
|
||||
session_lock=None,
|
||||
text_only_scope=None,
|
||||
text_export_format='txt',
|
||||
single_pdf_mode=False,
|
||||
project_root_dir=None,
|
||||
):
|
||||
super ().__init__ ()
|
||||
self .api_url_input =api_url_input
|
||||
@ -1660,6 +1859,11 @@ class DownloadThread (QThread ):
|
||||
self.session_file_path = session_file_path
|
||||
self.session_lock = session_lock
|
||||
self.history_candidates_buffer =deque (maxlen =8 )
|
||||
self.text_only_scope = text_only_scope
|
||||
self.text_export_format = text_export_format
|
||||
self.single_pdf_mode = single_pdf_mode # <-- ADD THIS LINE
|
||||
self.project_root_dir = project_root_dir # Add this assignment
|
||||
|
||||
if self .compress_images and Image is None :
|
||||
self .logger ("⚠️ Image compression disabled: Pillow library not found (DownloadThread).")
|
||||
self .compress_images =False
|
||||
@ -1682,162 +1886,172 @@ class DownloadThread (QThread ):
|
||||
self .logger ("⏭️ Skip requested for current file (single-thread mode).")
|
||||
self .skip_current_file_flag .set ()
|
||||
else :self .logger ("ℹ️ Skip file: No download active or skip flag not available for current context.")
|
||||
|
||||
def run (self ):
|
||||
"""
|
||||
The main execution method for the single-threaded download process.
|
||||
This version is corrected to handle 7 return values from the worker and
|
||||
to pass the 'single_pdf_mode' setting correctly.
|
||||
"""
|
||||
grand_total_downloaded_files =0
|
||||
grand_total_skipped_files =0
|
||||
grand_list_of_kept_original_filenames =[]
|
||||
was_process_cancelled =False
|
||||
|
||||
# This block for initializing manga mode counters remains unchanged
|
||||
if self .manga_mode_active and self .manga_filename_style ==STYLE_DATE_BASED and not self .extract_links_only and self .manga_date_file_counter_ref is None :
|
||||
series_scan_dir =self .output_dir
|
||||
if self .use_subfolders :
|
||||
if self .filter_character_list_objects_initial and self .filter_character_list_objects_initial [0 ]and self .filter_character_list_objects_initial [0 ].get ("name"):
|
||||
series_folder_name =clean_folder_name (self .filter_character_list_objects_initial [0 ]["name"])
|
||||
series_scan_dir =os .path .join (series_scan_dir ,series_folder_name )
|
||||
elif self .service and self .user_id :
|
||||
creator_based_folder_name =clean_folder_name (str (self .user_id ))
|
||||
series_scan_dir =os .path .join (series_scan_dir ,creator_based_folder_name )
|
||||
highest_num =0
|
||||
if os .path .isdir (series_scan_dir ):
|
||||
self .logger (f"ℹ️ [Thread] Manga Date Mode: Scanning for existing files in '{series_scan_dir }'...")
|
||||
for dirpath ,_ ,filenames_in_dir in os .walk (series_scan_dir ):
|
||||
for filename_to_check in filenames_in_dir :
|
||||
|
||||
prefix_to_check =clean_filename (self .manga_date_prefix .strip ())if self .manga_date_prefix and self .manga_date_prefix .strip ()else ""
|
||||
name_part_to_match =filename_to_check
|
||||
if prefix_to_check and name_part_to_match .startswith (prefix_to_check ):
|
||||
name_part_to_match =name_part_to_match [len (prefix_to_check ):].lstrip ()
|
||||
|
||||
base_name_no_ext =os .path .splitext (name_part_to_match )[0 ]
|
||||
match =re .match (r"(\d+)",base_name_no_ext )
|
||||
if match :highest_num =max (highest_num ,int (match .group (1 )))
|
||||
self .manga_date_file_counter_ref =[highest_num +1 ,threading .Lock ()]
|
||||
self .logger (f"ℹ️ [Thread] Manga Date Mode: Initialized date-based counter at {self .manga_date_file_counter_ref [0 ]}.")
|
||||
|
||||
|
||||
# ... (existing manga counter initialization logic) ...
|
||||
pass
|
||||
if self .manga_mode_active and self .manga_filename_style ==STYLE_POST_TITLE_GLOBAL_NUMBERING and not self .extract_links_only and self .manga_global_file_counter_ref is None :
|
||||
self .manga_global_file_counter_ref =[1 ,threading .Lock ()]
|
||||
self .logger (f"ℹ️ [Thread] Manga Title+GlobalNum Mode: Initialized global counter at {self .manga_global_file_counter_ref [0 ]}.")
|
||||
worker_signals_obj = PostProcessorSignals ()
|
||||
# ... (existing manga counter initialization logic) ...
|
||||
pass
|
||||
|
||||
worker_signals_obj = PostProcessorSignals()
|
||||
try :
|
||||
worker_signals_obj .progress_signal .connect (self .progress_signal )
|
||||
worker_signals_obj .file_download_status_signal .connect (self .file_download_status_signal )
|
||||
worker_signals_obj .file_progress_signal .connect (self .file_progress_signal )
|
||||
worker_signals_obj .external_link_signal .connect (self .external_link_signal )
|
||||
worker_signals_obj .missed_character_post_signal .connect (self .missed_character_post_signal )
|
||||
worker_signals_obj .file_successfully_downloaded_signal .connect (self .file_successfully_downloaded_signal )
|
||||
self .logger (" Starting post fetch (single-threaded download process)...")
|
||||
post_generator =download_from_api (
|
||||
self .api_url_input ,
|
||||
logger =self .logger ,
|
||||
start_page =self .start_page ,
|
||||
end_page =self .end_page ,
|
||||
manga_mode =self .manga_mode_active ,
|
||||
cancellation_event =self .cancellation_event ,
|
||||
pause_event =self .pause_event ,
|
||||
use_cookie =self .use_cookie ,
|
||||
cookie_text =self .cookie_text ,
|
||||
selected_cookie_file =self .selected_cookie_file ,
|
||||
app_base_dir =self .app_base_dir ,
|
||||
manga_filename_style_for_sort_check =self .manga_filename_style if self .manga_mode_active else None
|
||||
# Connect signals
|
||||
worker_signals_obj.progress_signal.connect(self.progress_signal)
|
||||
worker_signals_obj.file_download_status_signal.connect(self.file_download_status_signal)
|
||||
worker_signals_obj.file_progress_signal.connect(self.file_progress_signal)
|
||||
worker_signals_obj.external_link_signal.connect(self.external_link_signal)
|
||||
worker_signals_obj.missed_character_post_signal.connect(self.missed_character_post_signal)
|
||||
worker_signals_obj.file_successfully_downloaded_signal.connect(self.file_successfully_downloaded_signal)
|
||||
worker_signals_obj.worker_finished_signal.connect(lambda result: None) # Connect to dummy lambda to avoid errors
|
||||
|
||||
self.logger(" Starting post fetch (single-threaded download process)...")
|
||||
post_generator = download_from_api(
|
||||
self.api_url_input,
|
||||
logger=self.logger,
|
||||
start_page=self.start_page,
|
||||
end_page=self.end_page,
|
||||
manga_mode=self.manga_mode_active,
|
||||
cancellation_event=self.cancellation_event,
|
||||
pause_event=self.pause_event,
|
||||
use_cookie=self.use_cookie,
|
||||
cookie_text=self.cookie_text,
|
||||
selected_cookie_file=self.selected_cookie_file,
|
||||
app_base_dir=self.app_base_dir,
|
||||
manga_filename_style_for_sort_check=self.manga_filename_style if self.manga_mode_active else None
|
||||
)
|
||||
for posts_batch_data in post_generator :
|
||||
if self ._check_pause_self ("Post batch processing"):was_process_cancelled =True ;break
|
||||
if self .isInterruptionRequested ():was_process_cancelled =True ;break
|
||||
for individual_post_data in posts_batch_data :
|
||||
if self ._check_pause_self (f"Individual post processing for {individual_post_data .get ('id','N/A')}"):was_process_cancelled =True ;break
|
||||
if self .isInterruptionRequested ():was_process_cancelled =True ;break
|
||||
post_processing_worker =PostProcessorWorker (
|
||||
post_data =individual_post_data ,
|
||||
download_root =self .output_dir ,
|
||||
known_names =self .known_names ,
|
||||
filter_character_list =self .filter_character_list_objects_initial ,
|
||||
dynamic_character_filter_holder =self .dynamic_filter_holder ,
|
||||
unwanted_keywords =self .unwanted_keywords ,
|
||||
filter_mode =self .filter_mode ,
|
||||
skip_zip =self .skip_zip ,skip_rar =self .skip_rar ,
|
||||
use_subfolders =self .use_subfolders ,use_post_subfolders =self .use_post_subfolders ,
|
||||
target_post_id_from_initial_url =self .initial_target_post_id ,
|
||||
custom_folder_name =self .custom_folder_name ,
|
||||
compress_images =self .compress_images ,download_thumbnails =self .download_thumbnails ,
|
||||
service =self .service ,user_id =self .user_id ,
|
||||
api_url_input =self .api_url_input ,
|
||||
pause_event =self .pause_event ,
|
||||
cancellation_event =self .cancellation_event ,
|
||||
emitter =worker_signals_obj ,
|
||||
downloaded_files =self .downloaded_files ,
|
||||
downloaded_file_hashes =self .downloaded_file_hashes ,
|
||||
downloaded_files_lock =self .downloaded_files_lock ,
|
||||
downloaded_file_hashes_lock =self .downloaded_file_hashes_lock ,
|
||||
skip_words_list =self .skip_words_list ,
|
||||
skip_words_scope =self .skip_words_scope ,
|
||||
show_external_links =self .show_external_links ,
|
||||
extract_links_only =self .extract_links_only ,
|
||||
num_file_threads =self .num_file_threads_for_worker ,
|
||||
skip_current_file_flag =self .skip_current_file_flag ,
|
||||
manga_mode_active =self .manga_mode_active ,
|
||||
manga_filename_style =self .manga_filename_style ,
|
||||
manga_date_prefix =self .manga_date_prefix ,
|
||||
char_filter_scope =self .char_filter_scope ,
|
||||
remove_from_filename_words_list =self .remove_from_filename_words_list ,
|
||||
allow_multipart_download =self .allow_multipart_download ,
|
||||
selected_cookie_file =self .selected_cookie_file ,
|
||||
app_base_dir =self .app_base_dir ,
|
||||
cookie_text =self .cookie_text ,
|
||||
override_output_dir =self .override_output_dir ,
|
||||
manga_global_file_counter_ref =self .manga_global_file_counter_ref ,
|
||||
use_cookie =self .use_cookie ,
|
||||
manga_date_file_counter_ref =self .manga_date_file_counter_ref ,
|
||||
use_date_prefix_for_subfolder=self.use_date_prefix_for_subfolder,
|
||||
keep_in_post_duplicates=self.keep_in_post_duplicates,
|
||||
creator_download_folder_ignore_words =self .creator_download_folder_ignore_words ,
|
||||
session_file_path=self.session_file_path,
|
||||
session_lock=self.session_lock,
|
||||
|
||||
for posts_batch_data in post_generator:
|
||||
if self.isInterruptionRequested():
|
||||
was_process_cancelled = True
|
||||
break
|
||||
for individual_post_data in posts_batch_data:
|
||||
if self.isInterruptionRequested():
|
||||
was_process_cancelled = True
|
||||
break
|
||||
|
||||
# Create the worker, now correctly passing single_pdf_mode
|
||||
post_processing_worker = PostProcessorWorker(
|
||||
post_data=individual_post_data,
|
||||
download_root=self.output_dir,
|
||||
known_names=self.known_names,
|
||||
filter_character_list=self.filter_character_list_objects_initial,
|
||||
dynamic_character_filter_holder=self.dynamic_filter_holder,
|
||||
unwanted_keywords=self.unwanted_keywords,
|
||||
filter_mode=self.filter_mode,
|
||||
skip_zip=self.skip_zip, skip_rar=self.skip_rar,
|
||||
use_subfolders=self.use_subfolders, use_post_subfolders=self.use_post_subfolders,
|
||||
target_post_id_from_initial_url=self.initial_target_post_id,
|
||||
custom_folder_name=self.custom_folder_name,
|
||||
compress_images=self.compress_images, download_thumbnails=self.download_thumbnails,
|
||||
service=self.service, user_id=self.user_id,
|
||||
api_url_input=self.api_url_input,
|
||||
pause_event=self.pause_event,
|
||||
cancellation_event=self.cancellation_event,
|
||||
emitter=worker_signals_obj,
|
||||
downloaded_files=self.downloaded_files,
|
||||
downloaded_file_hashes=self.downloaded_file_hashes,
|
||||
downloaded_files_lock=self.downloaded_files_lock,
|
||||
downloaded_file_hashes_lock=self.downloaded_file_hashes_lock,
|
||||
skip_words_list=self.skip_words_list,
|
||||
skip_words_scope=self.skip_words_scope,
|
||||
show_external_links=self.show_external_links,
|
||||
extract_links_only=self.extract_links_only,
|
||||
num_file_threads=self.num_file_threads_for_worker,
|
||||
skip_current_file_flag=self.skip_current_file_flag,
|
||||
manga_mode_active=self.manga_mode_active,
|
||||
manga_filename_style=self.manga_filename_style,
|
||||
manga_date_prefix=self.manga_date_prefix,
|
||||
char_filter_scope=self.char_filter_scope,
|
||||
remove_from_filename_words_list=self.remove_from_filename_words_list,
|
||||
allow_multipart_download=self.allow_multipart_download,
|
||||
selected_cookie_file=self.selected_cookie_file,
|
||||
app_base_dir=self.app_base_dir,
|
||||
cookie_text=self.cookie_text,
|
||||
override_output_dir=self.override_output_dir,
|
||||
manga_global_file_counter_ref=self.manga_global_file_counter_ref,
|
||||
use_cookie=self.use_cookie,
|
||||
manga_date_file_counter_ref=self.manga_date_file_counter_ref,
|
||||
use_date_prefix_for_subfolder=self.use_date_prefix_for_subfolder,
|
||||
keep_in_post_duplicates=self.keep_in_post_duplicates,
|
||||
creator_download_folder_ignore_words=self.creator_download_folder_ignore_words,
|
||||
session_file_path=self.session_file_path,
|
||||
session_lock=self.session_lock,
|
||||
text_only_scope=self.text_only_scope,
|
||||
text_export_format=self.text_export_format,
|
||||
single_pdf_mode=self.single_pdf_mode, # <-- This is now correctly passed
|
||||
project_root_dir=self.project_root_dir
|
||||
)
|
||||
try :
|
||||
dl_count ,skip_count ,kept_originals_this_post ,retryable_failures ,permanent_failures ,history_data =post_processing_worker .process ()
|
||||
grand_total_downloaded_files +=dl_count
|
||||
grand_total_skipped_files +=skip_count
|
||||
if kept_originals_this_post :
|
||||
grand_list_of_kept_original_filenames .extend (kept_originals_this_post )
|
||||
if retryable_failures :
|
||||
self .retryable_file_failed_signal .emit (retryable_failures )
|
||||
if history_data :
|
||||
if len (self .history_candidates_buffer )<8 :
|
||||
self .post_processed_for_history_signal .emit (history_data )
|
||||
if permanent_failures :
|
||||
self .permanent_file_failed_signal .emit (permanent_failures )
|
||||
except Exception as proc_err :
|
||||
post_id_for_err =individual_post_data .get ('id','N/A')
|
||||
self .logger (f"❌ Error processing post {post_id_for_err } in DownloadThread: {proc_err }")
|
||||
traceback .print_exc ()
|
||||
num_potential_files_est =len (individual_post_data .get ('attachments',[]))+(1 if individual_post_data .get ('file')else 0 )
|
||||
grand_total_skipped_files +=num_potential_files_est
|
||||
if self .skip_current_file_flag and self .skip_current_file_flag .is_set ():
|
||||
self .skip_current_file_flag .clear ()
|
||||
self .logger (" Skip current file flag was processed and cleared by DownloadThread.")
|
||||
self .msleep (10 )
|
||||
if was_process_cancelled :break
|
||||
if not was_process_cancelled and not self .isInterruptionRequested ():
|
||||
self .logger ("✅ All posts processed or end of content reached by DownloadThread.")
|
||||
try:
|
||||
# Correctly unpack the 7 values returned from the worker
|
||||
(dl_count, skip_count, kept_originals_this_post,
|
||||
retryable_failures, permanent_failures,
|
||||
history_data, temp_filepath) = post_processing_worker.process()
|
||||
|
||||
grand_total_downloaded_files += dl_count
|
||||
grand_total_skipped_files += skip_count
|
||||
|
||||
if kept_originals_this_post:
|
||||
grand_list_of_kept_original_filenames.extend(kept_originals_this_post)
|
||||
if retryable_failures:
|
||||
self.retryable_file_failed_signal.emit(retryable_failures)
|
||||
if history_data:
|
||||
if len(self.history_candidates_buffer) < 8:
|
||||
self.post_processed_for_history_signal.emit(history_data)
|
||||
if permanent_failures:
|
||||
self.permanent_file_failed_signal.emit(permanent_failures)
|
||||
|
||||
# In single-threaded text mode, pass the temp file path back to the main window
|
||||
if self.single_pdf_mode and temp_filepath:
|
||||
self.progress_signal.emit(f"TEMP_FILE_PATH:{temp_filepath}")
|
||||
|
||||
except Exception as main_thread_err :
|
||||
self .logger (f"\n❌ Critical error within DownloadThread run loop: {main_thread_err }")
|
||||
traceback .print_exc ()
|
||||
if not self .isInterruptionRequested ():was_process_cancelled =False
|
||||
finally :
|
||||
try :
|
||||
if worker_signals_obj :
|
||||
worker_signals_obj .progress_signal .disconnect (self .progress_signal )
|
||||
worker_signals_obj .file_download_status_signal .disconnect (self .file_download_status_signal )
|
||||
worker_signals_obj .external_link_signal .disconnect (self .external_link_signal )
|
||||
worker_signals_obj .file_progress_signal .disconnect (self .file_progress_signal )
|
||||
worker_signals_obj .missed_character_post_signal .disconnect (self .missed_character_post_signal )
|
||||
worker_signals_obj .file_successfully_downloaded_signal .disconnect (self .file_successfully_downloaded_signal )
|
||||
except Exception as proc_err:
|
||||
post_id_for_err = individual_post_data.get('id', 'N/A')
|
||||
self.logger(f"❌ Error processing post {post_id_for_err} in DownloadThread: {proc_err}")
|
||||
traceback.print_exc()
|
||||
num_potential_files_est = len(individual_post_data.get('attachments', [])) + (1 if individual_post_data.get('file') else 0)
|
||||
grand_total_skipped_files += num_potential_files_est
|
||||
|
||||
if self.skip_current_file_flag and self.skip_current_file_flag.is_set():
|
||||
self.skip_current_file_flag.clear()
|
||||
self.logger(" Skip current file flag was processed and cleared by DownloadThread.")
|
||||
self.msleep(10)
|
||||
if was_process_cancelled:
|
||||
break
|
||||
if not was_process_cancelled and not self.isInterruptionRequested():
|
||||
self.logger("✅ All posts processed or end of content reached by DownloadThread.")
|
||||
|
||||
except Exception as main_thread_err:
|
||||
self.logger(f"\n❌ Critical error within DownloadThread run loop: {main_thread_err}")
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
try:
|
||||
# Disconnect signals
|
||||
if worker_signals_obj:
|
||||
worker_signals_obj.progress_signal.disconnect(self.progress_signal)
|
||||
worker_signals_obj.file_download_status_signal.disconnect(self.file_download_status_signal)
|
||||
worker_signals_obj.external_link_signal.disconnect(self.external_link_signal)
|
||||
worker_signals_obj.file_progress_signal.disconnect(self.file_progress_signal)
|
||||
worker_signals_obj.missed_character_post_signal.disconnect(self.missed_character_post_signal)
|
||||
worker_signals_obj.file_successfully_downloaded_signal.disconnect(self.file_successfully_downloaded_signal)
|
||||
except (TypeError, RuntimeError) as e:
|
||||
self.logger(f"ℹ️ Note during DownloadThread signal disconnection: {e}")
|
||||
|
||||
# Emit the final signal with all collected results
|
||||
self.finished_signal.emit(grand_total_downloaded_files, grand_total_skipped_files, self.isInterruptionRequested(), grand_list_of_kept_original_filenames)
|
||||
|
||||
except (TypeError ,RuntimeError )as e :
|
||||
self .logger (f"ℹ️ Note during DownloadThread signal disconnection: {e }")
|
||||
self .finished_signal .emit (grand_total_downloaded_files ,grand_total_skipped_files ,self .isInterruptionRequested (),grand_list_of_kept_original_filenames )
|
||||
def receive_add_character_result (self ,result ):
|
||||
with QMutexLocker (self .prompt_mutex ):
|
||||
self ._add_character_response =result
|
||||
|
||||
@ -144,7 +144,7 @@ class EmptyPopupDialog (QDialog ):
|
||||
self .setMinimumSize (int (400 *scale_factor ),int (300 *scale_factor ))
|
||||
|
||||
self .parent_app =parent_app_ref
|
||||
self .current_scope_mode =self .SCOPE_CHARACTERS
|
||||
self.current_scope_mode = self.SCOPE_CREATORS
|
||||
self .app_base_dir =app_base_dir
|
||||
|
||||
app_icon =get_app_icon_object ()
|
||||
|
||||
@ -126,6 +126,21 @@ class FavoriteArtistsDialog (QDialog ):
|
||||
self .artist_list_widget .setVisible (show )
|
||||
|
||||
def _fetch_favorite_artists (self ):
|
||||
|
||||
if self.cookies_config['use_cookie']:
|
||||
# Check if we can load cookies for at least one of the services.
|
||||
kemono_cookies = prepare_cookies_for_request(True, self.cookies_config['cookie_text'], self.cookies_config['selected_cookie_file'], self.cookies_config['app_base_dir'], self._logger, target_domain="kemono.su")
|
||||
coomer_cookies = prepare_cookies_for_request(True, self.cookies_config['cookie_text'], self.cookies_config['selected_cookie_file'], self.cookies_config['app_base_dir'], self._logger, target_domain="coomer.su")
|
||||
|
||||
if not kemono_cookies and not coomer_cookies:
|
||||
# If cookies are enabled but none could be loaded, show help and stop.
|
||||
self.status_label.setText(self._tr("fav_artists_cookies_required_status", "Error: Cookies enabled but could not be loaded for any source."))
|
||||
self._logger("Error: Cookies enabled but no valid cookies were loaded. Showing help dialog.")
|
||||
cookie_help_dialog = CookieHelpDialog(self.parent_app, self)
|
||||
cookie_help_dialog.exec_()
|
||||
self.download_button.setEnabled(False)
|
||||
return # Stop further execution
|
||||
|
||||
kemono_fav_url ="https://kemono.su/api/v1/account/favorites?type=artist"
|
||||
coomer_fav_url ="https://coomer.su/api/v1/account/favorites?type=artist"
|
||||
|
||||
|
||||
83
src/ui/dialogs/MoreOptionsDialog.py
Normal file
83
src/ui/dialogs/MoreOptionsDialog.py
Normal file
@ -0,0 +1,83 @@
|
||||
from PyQt5.QtWidgets import (
|
||||
QDialog, QVBoxLayout, QRadioButton, QDialogButtonBox, QButtonGroup, QLabel, QComboBox, QHBoxLayout, QCheckBox
|
||||
)
|
||||
from PyQt5.QtCore import Qt
|
||||
|
||||
class MoreOptionsDialog(QDialog):
|
||||
"""
|
||||
A dialog for selecting a scope, export format, and single PDF option.
|
||||
"""
|
||||
SCOPE_CONTENT = "content"
|
||||
SCOPE_COMMENTS = "comments"
|
||||
|
||||
def __init__(self, parent=None, current_scope=None, current_format=None, single_pdf_checked=False):
|
||||
super().__init__(parent)
|
||||
self.setWindowTitle("More Options")
|
||||
self.setMinimumWidth(350)
|
||||
|
||||
# ... (Layout and other widgets remain the same) ...
|
||||
|
||||
layout = QVBoxLayout(self)
|
||||
self.description_label = QLabel("Please choose the scope for the action:")
|
||||
layout.addWidget(self.description_label)
|
||||
self.radio_button_group = QButtonGroup(self)
|
||||
self.radio_content = QRadioButton("Description/Content")
|
||||
self.radio_comments = QRadioButton("Comments")
|
||||
self.radio_button_group.addButton(self.radio_content)
|
||||
self.radio_button_group.addButton(self.radio_comments)
|
||||
layout.addWidget(self.radio_content)
|
||||
layout.addWidget(self.radio_comments)
|
||||
|
||||
if current_scope == self.SCOPE_COMMENTS:
|
||||
self.radio_comments.setChecked(True)
|
||||
else:
|
||||
self.radio_content.setChecked(True)
|
||||
|
||||
export_layout = QHBoxLayout()
|
||||
export_label = QLabel("Export as:")
|
||||
self.format_combo = QComboBox()
|
||||
self.format_combo.addItems(["PDF", "DOCX", "TXT"])
|
||||
|
||||
if current_format and current_format.upper() in ["PDF", "DOCX", "TXT"]:
|
||||
self.format_combo.setCurrentText(current_format.upper())
|
||||
else:
|
||||
self.format_combo.setCurrentText("PDF")
|
||||
|
||||
export_layout.addWidget(export_label)
|
||||
export_layout.addWidget(self.format_combo)
|
||||
export_layout.addStretch()
|
||||
layout.addLayout(export_layout)
|
||||
|
||||
# --- UPDATED: Single PDF Checkbox ---
|
||||
self.single_pdf_checkbox = QCheckBox("Single PDF")
|
||||
self.single_pdf_checkbox.setToolTip("If checked, all text from matching posts will be compiled into one single PDF file.")
|
||||
self.single_pdf_checkbox.setChecked(single_pdf_checked)
|
||||
layout.addWidget(self.single_pdf_checkbox)
|
||||
|
||||
self.format_combo.currentTextChanged.connect(self.update_single_pdf_checkbox_state)
|
||||
self.update_single_pdf_checkbox_state(self.format_combo.currentText())
|
||||
|
||||
self.button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel)
|
||||
self.button_box.accepted.connect(self.accept)
|
||||
self.button_box.rejected.connect(self.reject)
|
||||
layout.addWidget(self.button_box)
|
||||
self.setLayout(layout)
|
||||
|
||||
def update_single_pdf_checkbox_state(self, text):
|
||||
"""Enable the Single PDF checkbox only if the format is PDF."""
|
||||
is_pdf = (text.upper() == "PDF")
|
||||
self.single_pdf_checkbox.setEnabled(is_pdf)
|
||||
if not is_pdf:
|
||||
self.single_pdf_checkbox.setChecked(False)
|
||||
|
||||
def get_selected_scope(self):
|
||||
if self.radio_comments.isChecked():
|
||||
return self.SCOPE_COMMENTS
|
||||
return self.SCOPE_CONTENT
|
||||
|
||||
def get_selected_format(self):
|
||||
return self.format_combo.currentText().lower()
|
||||
|
||||
def get_single_pdf_state(self):
|
||||
"""Returns the state of the Single PDF checkbox."""
|
||||
return self.single_pdf_checkbox.isChecked() and self.single_pdf_checkbox.isEnabled()
|
||||
77
src/ui/dialogs/SinglePDF.py
Normal file
77
src/ui/dialogs/SinglePDF.py
Normal file
@ -0,0 +1,77 @@
|
||||
# SinglePDF.py
|
||||
|
||||
import os
|
||||
try:
|
||||
from fpdf import FPDF
|
||||
FPDF_AVAILABLE = True
|
||||
except ImportError:
|
||||
FPDF_AVAILABLE = False
|
||||
|
||||
class PDF(FPDF):
|
||||
"""Custom PDF class to handle headers and footers."""
|
||||
def header(self):
|
||||
# No header
|
||||
pass
|
||||
|
||||
def footer(self):
|
||||
# Position at 1.5 cm from bottom
|
||||
self.set_y(-15)
|
||||
self.set_font('DejaVu', '', 8)
|
||||
# Page number
|
||||
self.cell(0, 10, 'Page ' + str(self.page_no()), 0, 0, 'C')
|
||||
|
||||
def create_single_pdf_from_content(posts_data, output_filename, font_path, logger=print):
|
||||
"""
|
||||
Creates a single PDF from a list of post titles and content.
|
||||
|
||||
Args:
|
||||
posts_data (list): A list of dictionaries, where each dict has 'title' and 'content' keys.
|
||||
output_filename (str): The full path for the output PDF file.
|
||||
font_path (str): Path to the DejaVuSans.ttf font file.
|
||||
logger (function, optional): A function to log progress and errors. Defaults to print.
|
||||
"""
|
||||
if not FPDF_AVAILABLE:
|
||||
logger("❌ PDF Creation failed: 'fpdf2' library is not installed. Please run: pip install fpdf2")
|
||||
return False
|
||||
|
||||
if not posts_data:
|
||||
logger(" No text content was collected to create a PDF.")
|
||||
return False
|
||||
|
||||
pdf = PDF()
|
||||
|
||||
try:
|
||||
if not os.path.exists(font_path):
|
||||
raise RuntimeError("Font file not found.")
|
||||
pdf.add_font('DejaVu', '', font_path, uni=True)
|
||||
pdf.add_font('DejaVu', 'B', font_path, uni=True) # Add Bold variant
|
||||
except Exception as font_error:
|
||||
logger(f" ⚠️ Could not load DejaVu font: {font_error}")
|
||||
logger(" PDF may not support all characters. Falling back to default Arial font.")
|
||||
pdf.set_font('Arial', '', 12)
|
||||
pdf.set_font('Arial', 'B', 16)
|
||||
|
||||
logger(f" Starting PDF creation with content from {len(posts_data)} posts...")
|
||||
|
||||
for post in posts_data:
|
||||
pdf.add_page()
|
||||
# Post Title
|
||||
pdf.set_font('DejaVu', 'B', 16)
|
||||
|
||||
# vvv THIS LINE IS CORRECTED vvv
|
||||
# We explicitly set align='L' and remove the incorrect positional arguments.
|
||||
pdf.multi_cell(w=0, h=10, text=post.get('title', 'Untitled Post'), align='L')
|
||||
|
||||
pdf.ln(5) # Add a little space after the title
|
||||
|
||||
# Post Content
|
||||
pdf.set_font('DejaVu', '', 12)
|
||||
pdf.multi_cell(w=0, h=7, text=post.get('content', 'No Content'))
|
||||
|
||||
try:
|
||||
pdf.output(output_filename)
|
||||
logger(f"✅ Successfully created single PDF: '{os.path.basename(output_filename)}'")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger(f"❌ A critical error occurred while saving the final PDF: {e}")
|
||||
return False
|
||||
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user