diff --git a/downloader_utils.py b/downloader_utils.py index 7552d08..703b947 100644 --- a/downloader_utils.py +++ b/downloader_utils.py @@ -153,10 +153,13 @@ def clean_filename(name): return final_name if final_name else "untitled_file" def strip_html_tags(html_text): if not html_text: return "" - text = html.unescape(html_text) - clean_pattern = re.compile('<.*?>') - cleaned_text = re.sub(clean_pattern, '', text) - return cleaned_text.strip() + text = html.unescape(str(html_text)) # Ensure input is a string + # Replace HTML tags with a single space + text_after_tag_removal = re.sub(r'<[^>]+>', ' ', text) + # Replace multiple whitespace characters (including newlines, tabs, etc. that are now spaces) + # with a single space. Also, strip leading/trailing whitespace from the final result. + cleaned_text = re.sub(r'\s+', ' ', text_after_tag_removal).strip() + return cleaned_text def extract_folder_name_from_title(title, unwanted_keywords): if not title: return 'Uncategorized' title_lower = title.lower() @@ -558,7 +561,7 @@ def get_link_platform(url): class PostProcessorSignals(QObject): progress_signal = pyqtSignal(str) file_download_status_signal = pyqtSignal(bool) - external_link_signal = pyqtSignal(str, str, str, str) + external_link_signal = pyqtSignal(str, str, str, str, str) # Added decryption_key (str) file_progress_signal = pyqtSignal(str, object) missed_character_post_signal = pyqtSignal(str, str) # New: post_title, reason class PostProcessorWorker: @@ -1211,6 +1214,8 @@ class PostProcessorWorker: if (self.show_external_links or self.extract_links_only) and post_content_html: # type: ignore if self._check_pause(f"External link extraction for post {post_id}"): return 0, num_potential_files_in_post, [], [] try: + # Regex for typical Mega decryption keys (43 or 22 chars, alphanumeric + hyphen/underscore) + mega_key_pattern = re.compile(r'\b([a-zA-Z0-9_-]{43}|[a-zA-Z0-9_-]{22})\b') unique_links_data = {} for match in link_pattern.finditer(post_content_html): link_url = match.group(1).strip() @@ -1226,10 +1231,29 @@ class PostProcessorWorker: links_emitted_count = 0 scraped_platforms = {'kemono', 'coomer', 'patreon'} for link_url, link_text in unique_links_data.items(): - platform = get_link_platform(link_url) - if platform not in scraped_platforms: - self._emit_signal('external_link', post_title, link_text, link_url, platform) - links_emitted_count +=1 + platform = get_link_platform(link_url) + decryption_key_found = "" + if platform == 'mega': + # 1. Check if key is in the URL fragment + parsed_mega_url = urlparse(link_url) + if parsed_mega_url.fragment: + potential_key_from_fragment = parsed_mega_url.fragment.split('!')[-1] # Handle cases like #!key or #key + if mega_key_pattern.fullmatch(potential_key_from_fragment): + decryption_key_found = potential_key_from_fragment + + # 2. If not in fragment, search in link text + if not decryption_key_found and link_text: + key_match_in_text = mega_key_pattern.search(link_text) + if key_match_in_text: + decryption_key_found = key_match_in_text.group(1) + # 3. If still not found, search the whole post content (if extracting links only, as it's more critical) + if not decryption_key_found and self.extract_links_only and post_content_html: + key_match_in_content = mega_key_pattern.search(strip_html_tags(post_content_html)) # Search cleaned content + if key_match_in_content: + decryption_key_found = key_match_in_content.group(1) + if platform not in scraped_platforms: + self._emit_signal('external_link', post_title, link_text, link_url, platform, decryption_key_found or "") + links_emitted_count +=1 if links_emitted_count > 0: self.logger(f" đ Found {links_emitted_count} potential external link(s) in post content.") except Exception as e: self.logger(f"â ī¸ Error parsing post content for links: {e}\n{traceback.format_exc(limit=2)}") if self.extract_links_only: @@ -1479,8 +1503,8 @@ class DownloadThread(QThread): progress_signal = pyqtSignal(str) # Already QObject, no need to change add_character_prompt_signal = pyqtSignal(str) file_download_status_signal = pyqtSignal(bool) - finished_signal = pyqtSignal(int, int, bool, list) - external_link_signal = pyqtSignal(str, str, str, str) + finished_signal = pyqtSignal(int, int, bool, list) # total_downloaded, total_skipped, cancelled_by_user, kept_original_names_list + external_link_signal = pyqtSignal(str, str, str, str, str) # post_title, link_text, link_url, platform, decryption_key file_progress_signal = pyqtSignal(str, object) retryable_file_failed_signal = pyqtSignal(list) # New: list of retry_details dicts missed_character_post_signal = pyqtSignal(str, str) # New: post_title, reason diff --git a/main.py b/main.py index d50f950..28ad2f1 100644 --- a/main.py +++ b/main.py @@ -71,7 +71,7 @@ except ImportError as e: class _MockPostProcessorSignals(QObject): progress_signal = pyqtSignal(str) file_download_status_signal = pyqtSignal(bool) - external_link_signal = pyqtSignal(str, str, str, str) + external_link_signal = pyqtSignal(str, str, str, str, str) # Added decryption_key file_progress_signal = pyqtSignal(str, object) missed_character_post_signal = pyqtSignal(str, str) def __init__(self, parent=None): @@ -232,6 +232,86 @@ class ConfirmAddAllDialog(QDialog): return CONFIRM_ADD_ALL_SKIP_ADDING return self.user_choice +class CookieHelpDialog(QDialog): + """A dialog to explain how to get a cookies.txt file.""" + # Define constants for user choices + CHOICE_PROCEED_WITHOUT_COOKIES = 1 + CHOICE_CANCEL_DOWNLOAD = 2 + CHOICE_OK_INFO_ONLY = 3 + + def __init__(self, parent=None, offer_download_without_option=False): + super().__init__(parent) + self.setWindowTitle("Cookie File Instructions") + self.setModal(True) + self.offer_download_without_option = offer_download_without_option + self.user_choice = None # Will be set by button actions + + # Main layout + main_layout = QVBoxLayout(self) + + instruction_text = """ +
To use cookies, you typically need a cookies.txt file from your browser.
+How to get cookies.txt:
+cookies.txt file to your computer.cookies.txt file you just saved.Alternatively, some extensions might allow you to copy the cookie string directly. If so, you can paste it into the text field instead of browsing for a file.
+ """ + info_label = QLabel(instruction_text) + info_label.setTextFormat(Qt.RichText) + info_label.setOpenExternalLinks(True) + info_label.setWordWrap(True) + main_layout.addWidget(info_label) + + # Button layout + button_layout = QHBoxLayout() + if self.offer_download_without_option: + button_layout.addStretch(1) # Push both buttons to the right + + self.download_without_button = QPushButton("Download without Cookies") + self.download_without_button.clicked.connect(self._proceed_without_cookies) + button_layout.addWidget(self.download_without_button) + + self.cancel_button = QPushButton("Cancel Download") + self.cancel_button.clicked.connect(self._cancel_download) + button_layout.addWidget(self.cancel_button) + else: + button_layout.addStretch(1) # Push OK to the right + self.ok_button = QPushButton("OK") + self.ok_button.clicked.connect(self._ok_info_only) + button_layout.addWidget(self.ok_button) + + main_layout.addLayout(button_layout) + + if parent and hasattr(parent, 'get_dark_theme'): + self.setStyleSheet(parent.get_dark_theme()) + self.setMinimumWidth(500) + + def _proceed_without_cookies(self): + self.user_choice = self.CHOICE_PROCEED_WITHOUT_COOKIES + self.accept() # or self.done(QDialog.Accepted) + + def _cancel_download(self): + self.user_choice = self.CHOICE_CANCEL_DOWNLOAD + self.reject() # or self.done(QDialog.Rejected) + + def _ok_info_only(self): + self.user_choice = self.CHOICE_OK_INFO_ONLY + self.accept() # or self.done(QDialog.Accepted) + class KnownNamesFilterDialog(QDialog): """A dialog to select names from Known.txt to add to the filter input.""" def __init__(self, known_names_list, parent=None): @@ -414,15 +494,16 @@ class FavoriteArtistsDialog(QDialog): self.cookies_config['app_base_dir'], self._logger ) - if self.cookies_config['use_cookie'] and not cookies_dict: self.status_label.setText("Error: Cookies enabled but could not be loaded. Cannot fetch favorites.") self._show_content_elements(False) - self._logger("Error: Cookies enabled but could not be loaded.") - QMessageBox.warning(self, "Cookie Error", "Cookies are enabled, but no valid cookies could be loaded. Please check your cookie settings or file.") + self._logger("Error: Cookies enabled but could not be loaded. Showing help dialog.") + + cookie_help_dialog = CookieHelpDialog(self) + cookie_help_dialog.exec_() + self.download_button.setEnabled(False) return - try: headers = {'User-Agent': 'Mozilla/5.0'} response = requests.get(fav_url, headers=headers, cookies=cookies_dict, timeout=20) @@ -549,7 +630,7 @@ class FavoritePostsFetcherThread(QThread): ) if self.cookies_config['use_cookie'] and not cookies_dict: - self.finished.emit([], "Error: Cookies enabled but could not be loaded.") + self.finished.emit([], "COOKIES_REQUIRED_BUT_NOT_FOUND") return try: @@ -734,12 +815,18 @@ class FavoritePostsDialog(QDialog): def _on_fetch_completed(self, fetched_posts_list, error_msg): if error_msg: - self.status_label.setText(error_msg) - self._logger(error_msg) # Log to main app log - QMessageBox.critical(self, "Fetch Error", error_msg) - # Keep download button disabled or handle as appropriate + if error_msg == "COOKIES_REQUIRED_BUT_NOT_FOUND": + self.status_label.setText("Error: Cookies are required for favorite posts but could not be loaded.") + self._logger("Error: Cookies required for favorite posts but not found. Showing help dialog.") + cookie_help_dialog = CookieHelpDialog(self) + cookie_help_dialog.exec_() + self.download_button.setEnabled(False) # Ensure it's disabled + else: + self.status_label.setText(error_msg) + self._logger(error_msg) # Log to main app log + QMessageBox.critical(self, "Fetch Error", error_msg) + self.download_button.setEnabled(False) return - self.progress_bar.setVisible(False) self.all_fetched_posts = fetched_posts_list self._populate_post_list_widget() # This will now group and display @@ -1459,8 +1546,8 @@ class DownloaderApp(QWidget): log_signal = pyqtSignal(str) add_character_prompt_signal = pyqtSignal(str) overall_progress_signal = pyqtSignal(int, int) - finished_signal = pyqtSignal(int, int, bool, list) - external_link_signal = pyqtSignal(str, str, str, str) + finished_signal = pyqtSignal(int, int, bool, list) # total_downloaded, total_skipped, cancelled_by_user, kept_original_names_list + external_link_signal = pyqtSignal(str, str, str, str, str) # post_title, link_text, link_url, platform, decryption_key file_progress_signal = pyqtSignal(str, object) @@ -2648,8 +2735,8 @@ class DownloaderApp(QWidget): multi_thread_active = fetcher_active or pool_has_active_tasks return single_thread_active or multi_thread_active - def handle_external_link_signal(self, post_title, link_text, link_url, platform): - link_data = (post_title, link_text, link_url, platform) + def handle_external_link_signal(self, post_title, link_text, link_url, platform, decryption_key): + link_data = (post_title, link_text, link_url, platform, decryption_key) self.external_link_queue.append(link_data) if self.radio_only_links and self.radio_only_links.isChecked(): self.extracted_links_cache.append(link_data) @@ -2682,7 +2769,7 @@ class DownloaderApp(QWidget): def _display_and_schedule_next(self, link_data): - post_title, link_text, link_url, platform = link_data + post_title, link_text, link_url, platform, decryption_key = link_data is_only_links_mode = self.radio_only_links and self.radio_only_links.isChecked() max_link_text_len = 35 @@ -2690,6 +2777,9 @@ class DownloaderApp(QWidget): formatted_link_info = f"{display_text} - {link_url} - {platform}" separator = "-" * 45 + if decryption_key: + formatted_link_info += f" (Decryption Key: {decryption_key})" + if is_only_links_mode: if post_title != self._current_link_post_title: self.log_signal.emit(HTML_PREFIX + "