This commit is contained in:
Yuvi9587 2025-05-29 17:56:16 +05:30
parent 78357df07f
commit eed0a919aa
2 changed files with 201 additions and 33 deletions

View File

@ -153,10 +153,13 @@ def clean_filename(name):
return final_name if final_name else "untitled_file"
def strip_html_tags(html_text):
if not html_text: return ""
text = html.unescape(html_text)
clean_pattern = re.compile('<.*?>')
cleaned_text = re.sub(clean_pattern, '', text)
return cleaned_text.strip()
text = html.unescape(str(html_text)) # Ensure input is a string
# Replace HTML tags with a single space
text_after_tag_removal = re.sub(r'<[^>]+>', ' ', text)
# Replace multiple whitespace characters (including newlines, tabs, etc. that are now spaces)
# with a single space. Also, strip leading/trailing whitespace from the final result.
cleaned_text = re.sub(r'\s+', ' ', text_after_tag_removal).strip()
return cleaned_text
def extract_folder_name_from_title(title, unwanted_keywords):
if not title: return 'Uncategorized'
title_lower = title.lower()
@ -558,7 +561,7 @@ def get_link_platform(url):
class PostProcessorSignals(QObject):
progress_signal = pyqtSignal(str)
file_download_status_signal = pyqtSignal(bool)
external_link_signal = pyqtSignal(str, str, str, str)
external_link_signal = pyqtSignal(str, str, str, str, str) # Added decryption_key (str)
file_progress_signal = pyqtSignal(str, object)
missed_character_post_signal = pyqtSignal(str, str) # New: post_title, reason
class PostProcessorWorker:
@ -1211,6 +1214,8 @@ class PostProcessorWorker:
if (self.show_external_links or self.extract_links_only) and post_content_html: # type: ignore
if self._check_pause(f"External link extraction for post {post_id}"): return 0, num_potential_files_in_post, [], []
try:
# Regex for typical Mega decryption keys (43 or 22 chars, alphanumeric + hyphen/underscore)
mega_key_pattern = re.compile(r'\b([a-zA-Z0-9_-]{43}|[a-zA-Z0-9_-]{22})\b')
unique_links_data = {}
for match in link_pattern.finditer(post_content_html):
link_url = match.group(1).strip()
@ -1227,8 +1232,27 @@ class PostProcessorWorker:
scraped_platforms = {'kemono', 'coomer', 'patreon'}
for link_url, link_text in unique_links_data.items():
platform = get_link_platform(link_url)
decryption_key_found = ""
if platform == 'mega':
# 1. Check if key is in the URL fragment
parsed_mega_url = urlparse(link_url)
if parsed_mega_url.fragment:
potential_key_from_fragment = parsed_mega_url.fragment.split('!')[-1] # Handle cases like #!key or #key
if mega_key_pattern.fullmatch(potential_key_from_fragment):
decryption_key_found = potential_key_from_fragment
# 2. If not in fragment, search in link text
if not decryption_key_found and link_text:
key_match_in_text = mega_key_pattern.search(link_text)
if key_match_in_text:
decryption_key_found = key_match_in_text.group(1)
# 3. If still not found, search the whole post content (if extracting links only, as it's more critical)
if not decryption_key_found and self.extract_links_only and post_content_html:
key_match_in_content = mega_key_pattern.search(strip_html_tags(post_content_html)) # Search cleaned content
if key_match_in_content:
decryption_key_found = key_match_in_content.group(1)
if platform not in scraped_platforms:
self._emit_signal('external_link', post_title, link_text, link_url, platform)
self._emit_signal('external_link', post_title, link_text, link_url, platform, decryption_key_found or "")
links_emitted_count +=1
if links_emitted_count > 0: self.logger(f" 🔗 Found {links_emitted_count} potential external link(s) in post content.")
except Exception as e: self.logger(f"⚠️ Error parsing post content for links: {e}\n{traceback.format_exc(limit=2)}")
@ -1479,8 +1503,8 @@ class DownloadThread(QThread):
progress_signal = pyqtSignal(str) # Already QObject, no need to change
add_character_prompt_signal = pyqtSignal(str)
file_download_status_signal = pyqtSignal(bool)
finished_signal = pyqtSignal(int, int, bool, list)
external_link_signal = pyqtSignal(str, str, str, str)
finished_signal = pyqtSignal(int, int, bool, list) # total_downloaded, total_skipped, cancelled_by_user, kept_original_names_list
external_link_signal = pyqtSignal(str, str, str, str, str) # post_title, link_text, link_url, platform, decryption_key
file_progress_signal = pyqtSignal(str, object)
retryable_file_failed_signal = pyqtSignal(list) # New: list of retry_details dicts
missed_character_post_signal = pyqtSignal(str, str) # New: post_title, reason

182
main.py
View File

@ -71,7 +71,7 @@ except ImportError as e:
class _MockPostProcessorSignals(QObject):
progress_signal = pyqtSignal(str)
file_download_status_signal = pyqtSignal(bool)
external_link_signal = pyqtSignal(str, str, str, str)
external_link_signal = pyqtSignal(str, str, str, str, str) # Added decryption_key
file_progress_signal = pyqtSignal(str, object)
missed_character_post_signal = pyqtSignal(str, str)
def __init__(self, parent=None):
@ -232,6 +232,86 @@ class ConfirmAddAllDialog(QDialog):
return CONFIRM_ADD_ALL_SKIP_ADDING
return self.user_choice
class CookieHelpDialog(QDialog):
"""A dialog to explain how to get a cookies.txt file."""
# Define constants for user choices
CHOICE_PROCEED_WITHOUT_COOKIES = 1
CHOICE_CANCEL_DOWNLOAD = 2
CHOICE_OK_INFO_ONLY = 3
def __init__(self, parent=None, offer_download_without_option=False):
super().__init__(parent)
self.setWindowTitle("Cookie File Instructions")
self.setModal(True)
self.offer_download_without_option = offer_download_without_option
self.user_choice = None # Will be set by button actions
# Main layout
main_layout = QVBoxLayout(self)
instruction_text = """
<p>To use cookies, you typically need a <b>cookies.txt</b> file from your browser.</p>
<p><b>How to get cookies.txt:</b></p>
<ol>
<li>Install the 'Get cookies.txt LOCALLY' extension for your Chrome-based browser:
<br><a href="https://chromewebstore.google.com/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc" style="color: #87CEEB;">Get cookies.txt LOCALLY on Chrome Web Store</a>
</li>
<li>Go to the website (e.g., kemono.su or coomer.su) and log in if necessary.</li>
<li>Click the extension's icon in your browser toolbar.</li>
<li>Click an 'Export' button (e.g., "Export As", "Export cookies.txt" - the exact wording might vary depending on the extension version).</li>
<li>Save the downloaded <code>cookies.txt</code> file to your computer.</li>
<li>In this application:
<ul>
<li>Ensure the 'Use Cookie' checkbox is checked.</li>
<li>Click the 'Browse...' button next to the cookie text field.</li>
<li>Select the <code>cookies.txt</code> file you just saved.</li>
</ul>
</li>
</ol>
<p>Alternatively, some extensions might allow you to copy the cookie string directly. If so, you can paste it into the text field instead of browsing for a file.</p>
"""
info_label = QLabel(instruction_text)
info_label.setTextFormat(Qt.RichText)
info_label.setOpenExternalLinks(True)
info_label.setWordWrap(True)
main_layout.addWidget(info_label)
# Button layout
button_layout = QHBoxLayout()
if self.offer_download_without_option:
button_layout.addStretch(1) # Push both buttons to the right
self.download_without_button = QPushButton("Download without Cookies")
self.download_without_button.clicked.connect(self._proceed_without_cookies)
button_layout.addWidget(self.download_without_button)
self.cancel_button = QPushButton("Cancel Download")
self.cancel_button.clicked.connect(self._cancel_download)
button_layout.addWidget(self.cancel_button)
else:
button_layout.addStretch(1) # Push OK to the right
self.ok_button = QPushButton("OK")
self.ok_button.clicked.connect(self._ok_info_only)
button_layout.addWidget(self.ok_button)
main_layout.addLayout(button_layout)
if parent and hasattr(parent, 'get_dark_theme'):
self.setStyleSheet(parent.get_dark_theme())
self.setMinimumWidth(500)
def _proceed_without_cookies(self):
self.user_choice = self.CHOICE_PROCEED_WITHOUT_COOKIES
self.accept() # or self.done(QDialog.Accepted)
def _cancel_download(self):
self.user_choice = self.CHOICE_CANCEL_DOWNLOAD
self.reject() # or self.done(QDialog.Rejected)
def _ok_info_only(self):
self.user_choice = self.CHOICE_OK_INFO_ONLY
self.accept() # or self.done(QDialog.Accepted)
class KnownNamesFilterDialog(QDialog):
"""A dialog to select names from Known.txt to add to the filter input."""
def __init__(self, known_names_list, parent=None):
@ -414,15 +494,16 @@ class FavoriteArtistsDialog(QDialog):
self.cookies_config['app_base_dir'],
self._logger
)
if self.cookies_config['use_cookie'] and not cookies_dict:
self.status_label.setText("Error: Cookies enabled but could not be loaded. Cannot fetch favorites.")
self._show_content_elements(False)
self._logger("Error: Cookies enabled but could not be loaded.")
QMessageBox.warning(self, "Cookie Error", "Cookies are enabled, but no valid cookies could be loaded. Please check your cookie settings or file.")
self._logger("Error: Cookies enabled but could not be loaded. Showing help dialog.")
cookie_help_dialog = CookieHelpDialog(self)
cookie_help_dialog.exec_()
self.download_button.setEnabled(False)
return
try:
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(fav_url, headers=headers, cookies=cookies_dict, timeout=20)
@ -549,7 +630,7 @@ class FavoritePostsFetcherThread(QThread):
)
if self.cookies_config['use_cookie'] and not cookies_dict:
self.finished.emit([], "Error: Cookies enabled but could not be loaded.")
self.finished.emit([], "COOKIES_REQUIRED_BUT_NOT_FOUND")
return
try:
@ -734,12 +815,18 @@ class FavoritePostsDialog(QDialog):
def _on_fetch_completed(self, fetched_posts_list, error_msg):
if error_msg:
if error_msg == "COOKIES_REQUIRED_BUT_NOT_FOUND":
self.status_label.setText("Error: Cookies are required for favorite posts but could not be loaded.")
self._logger("Error: Cookies required for favorite posts but not found. Showing help dialog.")
cookie_help_dialog = CookieHelpDialog(self)
cookie_help_dialog.exec_()
self.download_button.setEnabled(False) # Ensure it's disabled
else:
self.status_label.setText(error_msg)
self._logger(error_msg) # Log to main app log
QMessageBox.critical(self, "Fetch Error", error_msg)
# Keep download button disabled or handle as appropriate
self.download_button.setEnabled(False)
return
self.progress_bar.setVisible(False)
self.all_fetched_posts = fetched_posts_list
self._populate_post_list_widget() # This will now group and display
@ -1459,8 +1546,8 @@ class DownloaderApp(QWidget):
log_signal = pyqtSignal(str)
add_character_prompt_signal = pyqtSignal(str)
overall_progress_signal = pyqtSignal(int, int)
finished_signal = pyqtSignal(int, int, bool, list)
external_link_signal = pyqtSignal(str, str, str, str)
finished_signal = pyqtSignal(int, int, bool, list) # total_downloaded, total_skipped, cancelled_by_user, kept_original_names_list
external_link_signal = pyqtSignal(str, str, str, str, str) # post_title, link_text, link_url, platform, decryption_key
file_progress_signal = pyqtSignal(str, object)
@ -2648,8 +2735,8 @@ class DownloaderApp(QWidget):
multi_thread_active = fetcher_active or pool_has_active_tasks
return single_thread_active or multi_thread_active
def handle_external_link_signal(self, post_title, link_text, link_url, platform):
link_data = (post_title, link_text, link_url, platform)
def handle_external_link_signal(self, post_title, link_text, link_url, platform, decryption_key):
link_data = (post_title, link_text, link_url, platform, decryption_key)
self.external_link_queue.append(link_data)
if self.radio_only_links and self.radio_only_links.isChecked():
self.extracted_links_cache.append(link_data)
@ -2682,7 +2769,7 @@ class DownloaderApp(QWidget):
def _display_and_schedule_next(self, link_data):
post_title, link_text, link_url, platform = link_data
post_title, link_text, link_url, platform, decryption_key = link_data
is_only_links_mode = self.radio_only_links and self.radio_only_links.isChecked()
max_link_text_len = 35
@ -2690,6 +2777,9 @@ class DownloaderApp(QWidget):
formatted_link_info = f"{display_text} - {link_url} - {platform}"
separator = "-" * 45
if decryption_key:
formatted_link_info += f" (Decryption Key: {decryption_key})"
if is_only_links_mode:
if post_title != self._current_link_post_title:
self.log_signal.emit(HTML_PREFIX + "<br>" + separator + "<br>")
@ -2916,13 +3006,15 @@ class DownloaderApp(QWidget):
current_title_for_display = None
separator = "-" * 45
for post_title, link_text, link_url, platform in self.extracted_links_cache:
for post_title, link_text, link_url, platform, decryption_key in self.extracted_links_cache:
matches_search = (
not search_term or
search_term in link_text.lower() or
search_term in link_url.lower() or
search_term in platform.lower()
search_term in platform.lower() or
(decryption_key and search_term in decryption_key.lower())
)
if matches_search:
if post_title != current_title_for_display:
self.main_log_output.insertHtml("<br>" + separator + "<br>")
@ -2933,6 +3025,8 @@ class DownloaderApp(QWidget):
max_link_text_len = 35
display_text = link_text[:max_link_text_len].strip() + "..." if len(link_text) > max_link_text_len else link_text
formatted_link_info = f"{display_text} - {link_url} - {platform}"
if decryption_key:
formatted_link_info += f" (Decryption Key: {decryption_key})"
self.main_log_output.append(formatted_link_info)
if self.main_log_output.toPlainText().strip():
@ -2956,13 +3050,16 @@ class DownloaderApp(QWidget):
with open(filepath, 'w', encoding='utf-8') as f:
current_title_for_export = None
separator = "-" * 60 + "\n"
for post_title, link_text, link_url, platform in self.extracted_links_cache:
for post_title, link_text, link_url, platform, decryption_key in self.extracted_links_cache:
if post_title != current_title_for_export:
if current_title_for_export is not None:
f.write("\n" + separator + "\n")
f.write(f"Post Title: {post_title}\n\n")
current_title_for_export = post_title
f.write(f" {link_text} - {link_url} - {platform}\n")
line_to_write = f" {link_text} - {link_url} - {platform}"
if decryption_key:
line_to_write += f" (Decryption Key: {decryption_key})"
f.write(line_to_write + "\n")
self.log_signal.emit(f"✅ Links successfully exported to: {filepath}")
QMessageBox.information(self, "Export Successful", f"Links exported to:\n{filepath}")
except Exception as e:
@ -3665,7 +3762,35 @@ class DownloaderApp(QWidget):
use_cookie_from_checkbox = self.use_cookie_checkbox.isChecked() if hasattr(self, 'use_cookie_checkbox') else False
app_base_dir_for_cookies = os.path.dirname(self.config_file) # Directory of Known.txt
cookie_text_from_input = self.cookie_text_input.text().strip() if hasattr(self, 'cookie_text_input') and use_cookie_from_checkbox else ""
use_cookie_for_this_run = use_cookie_from_checkbox # Initialize with checkbox state
selected_cookie_file_path_for_backend = self.selected_cookie_filepath if use_cookie_from_checkbox and self.selected_cookie_filepath else None
if use_cookie_from_checkbox and not direct_api_url: # Don't show for individual items in favorite queue if they fail this check
# Perform an early check for cookies if 'Use Cookie' is checked for the main UI interaction
# The actual cookies for download/API calls will be prepared by the backend.
# This is for proactive UI feedback.
temp_cookies_for_check = prepare_cookies_for_request(
use_cookie_for_this_run, # Use the potentially modified flag
cookie_text_from_input,
selected_cookie_file_path_for_backend,
app_base_dir_for_cookies,
lambda msg: self.log_signal.emit(f"[UI Cookie Check] {msg}")
)
if temp_cookies_for_check is None:
cookie_dialog = CookieHelpDialog(self, offer_download_without_option=True)
dialog_exec_result = cookie_dialog.exec_()
if cookie_dialog.user_choice == CookieHelpDialog.CHOICE_PROCEED_WITHOUT_COOKIES and dialog_exec_result == QDialog.Accepted:
self.log_signal.emit(" User chose to download without cookies for this session.")
use_cookie_for_this_run = False # Override for this run
elif cookie_dialog.user_choice == CookieHelpDialog.CHOICE_CANCEL_DOWNLOAD or dialog_exec_result == QDialog.Rejected:
self.log_signal.emit("❌ Download cancelled by user at cookie prompt.")
return False
else: # Any other case, including closing the dialog via 'X' button
self.log_signal.emit("⚠️ Cookie dialog closed or unexpected choice. Aborting download.")
return False
current_skip_words_scope = self.get_skip_words_scope()
manga_mode_is_checked = self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False
@ -4072,7 +4197,7 @@ class DownloaderApp(QWidget):
'selected_cookie_file': selected_cookie_file_path_for_backend, # Pass selected cookie file
'manga_global_file_counter_ref': manga_global_file_counter_ref_for_thread, # Pass new counter
'app_base_dir': app_base_dir_for_cookies, # Pass app base dir
'use_cookie': use_cookie_from_checkbox, # Pass cookie setting
'use_cookie': use_cookie_for_this_run, # Pass the potentially modified cookie setting
}
args_template['override_output_dir'] = override_output_dir # Pass override dir in template
@ -5514,7 +5639,12 @@ class DownloaderApp(QWidget):
if selected_artists:
if len(selected_artists) > 1:
display_names = ", ".join([artist['name'] for artist in selected_artists])
self.link_input.setText(display_names)
# For multiple artists, we don't set the link_input as it's confusing.
# The queue will handle individual URLs.
# self.link_input.setText(display_names) # Avoid setting this
if self.link_input: # Clear it if it was showing a single URL before
self.link_input.clear()
self.link_input.setPlaceholderText(f"{len(selected_artists)} favorite artists selected for download queue.")
self.log_signal.emit(f" Multiple favorite artists selected. Displaying names: {display_names}")
elif len(selected_artists) == 1:
self.link_input.setText(selected_artists[0]['url']) # Show the single URL
@ -5545,6 +5675,20 @@ class DownloaderApp(QWidget):
}
global KNOWN_NAMES # Ensure we have access to the global
# Perform cookie check before showing the FavoritePostsDialog if cookies are enabled
if cookies_config['use_cookie']:
temp_cookies_for_check = prepare_cookies_for_request(
cookies_config['use_cookie'],
cookies_config['cookie_text'],
cookies_config['selected_cookie_file'],
cookies_config['app_base_dir'],
lambda msg: self.log_signal.emit(f"[FavPosts Cookie Check] {msg}")
)
if temp_cookies_for_check is None:
cookie_help_dialog = CookieHelpDialog(self)
cookie_help_dialog.exec_()
return # Don't proceed to show FavoritePostsDialog if cookies are needed but not found
dialog = FavoritePostsDialog(self, cookies_config, KNOWN_NAMES) # Pass KNOWN_NAMES
if dialog.exec_() == QDialog.Accepted:
selected_posts = dialog.get_selected_posts()