diff --git a/downloader_utils.py b/downloader_utils.py index 7552d08..703b947 100644 --- a/downloader_utils.py +++ b/downloader_utils.py @@ -153,10 +153,13 @@ def clean_filename(name): return final_name if final_name else "untitled_file" def strip_html_tags(html_text): if not html_text: return "" - text = html.unescape(html_text) - clean_pattern = re.compile('<.*?>') - cleaned_text = re.sub(clean_pattern, '', text) - return cleaned_text.strip() + text = html.unescape(str(html_text)) # Ensure input is a string + # Replace HTML tags with a single space + text_after_tag_removal = re.sub(r'<[^>]+>', ' ', text) + # Replace multiple whitespace characters (including newlines, tabs, etc. that are now spaces) + # with a single space. Also, strip leading/trailing whitespace from the final result. + cleaned_text = re.sub(r'\s+', ' ', text_after_tag_removal).strip() + return cleaned_text def extract_folder_name_from_title(title, unwanted_keywords): if not title: return 'Uncategorized' title_lower = title.lower() @@ -558,7 +561,7 @@ def get_link_platform(url): class PostProcessorSignals(QObject): progress_signal = pyqtSignal(str) file_download_status_signal = pyqtSignal(bool) - external_link_signal = pyqtSignal(str, str, str, str) + external_link_signal = pyqtSignal(str, str, str, str, str) # Added decryption_key (str) file_progress_signal = pyqtSignal(str, object) missed_character_post_signal = pyqtSignal(str, str) # New: post_title, reason class PostProcessorWorker: @@ -1211,6 +1214,8 @@ class PostProcessorWorker: if (self.show_external_links or self.extract_links_only) and post_content_html: # type: ignore if self._check_pause(f"External link extraction for post {post_id}"): return 0, num_potential_files_in_post, [], [] try: + # Regex for typical Mega decryption keys (43 or 22 chars, alphanumeric + hyphen/underscore) + mega_key_pattern = re.compile(r'\b([a-zA-Z0-9_-]{43}|[a-zA-Z0-9_-]{22})\b') unique_links_data = {} for match in link_pattern.finditer(post_content_html): link_url = match.group(1).strip() @@ -1226,10 +1231,29 @@ class PostProcessorWorker: links_emitted_count = 0 scraped_platforms = {'kemono', 'coomer', 'patreon'} for link_url, link_text in unique_links_data.items(): - platform = get_link_platform(link_url) - if platform not in scraped_platforms: - self._emit_signal('external_link', post_title, link_text, link_url, platform) - links_emitted_count +=1 + platform = get_link_platform(link_url) + decryption_key_found = "" + if platform == 'mega': + # 1. Check if key is in the URL fragment + parsed_mega_url = urlparse(link_url) + if parsed_mega_url.fragment: + potential_key_from_fragment = parsed_mega_url.fragment.split('!')[-1] # Handle cases like #!key or #key + if mega_key_pattern.fullmatch(potential_key_from_fragment): + decryption_key_found = potential_key_from_fragment + + # 2. If not in fragment, search in link text + if not decryption_key_found and link_text: + key_match_in_text = mega_key_pattern.search(link_text) + if key_match_in_text: + decryption_key_found = key_match_in_text.group(1) + # 3. If still not found, search the whole post content (if extracting links only, as it's more critical) + if not decryption_key_found and self.extract_links_only and post_content_html: + key_match_in_content = mega_key_pattern.search(strip_html_tags(post_content_html)) # Search cleaned content + if key_match_in_content: + decryption_key_found = key_match_in_content.group(1) + if platform not in scraped_platforms: + self._emit_signal('external_link', post_title, link_text, link_url, platform, decryption_key_found or "") + links_emitted_count +=1 if links_emitted_count > 0: self.logger(f" 🔗 Found {links_emitted_count} potential external link(s) in post content.") except Exception as e: self.logger(f"âš ī¸ Error parsing post content for links: {e}\n{traceback.format_exc(limit=2)}") if self.extract_links_only: @@ -1479,8 +1503,8 @@ class DownloadThread(QThread): progress_signal = pyqtSignal(str) # Already QObject, no need to change add_character_prompt_signal = pyqtSignal(str) file_download_status_signal = pyqtSignal(bool) - finished_signal = pyqtSignal(int, int, bool, list) - external_link_signal = pyqtSignal(str, str, str, str) + finished_signal = pyqtSignal(int, int, bool, list) # total_downloaded, total_skipped, cancelled_by_user, kept_original_names_list + external_link_signal = pyqtSignal(str, str, str, str, str) # post_title, link_text, link_url, platform, decryption_key file_progress_signal = pyqtSignal(str, object) retryable_file_failed_signal = pyqtSignal(list) # New: list of retry_details dicts missed_character_post_signal = pyqtSignal(str, str) # New: post_title, reason diff --git a/main.py b/main.py index d50f950..28ad2f1 100644 --- a/main.py +++ b/main.py @@ -71,7 +71,7 @@ except ImportError as e: class _MockPostProcessorSignals(QObject): progress_signal = pyqtSignal(str) file_download_status_signal = pyqtSignal(bool) - external_link_signal = pyqtSignal(str, str, str, str) + external_link_signal = pyqtSignal(str, str, str, str, str) # Added decryption_key file_progress_signal = pyqtSignal(str, object) missed_character_post_signal = pyqtSignal(str, str) def __init__(self, parent=None): @@ -232,6 +232,86 @@ class ConfirmAddAllDialog(QDialog): return CONFIRM_ADD_ALL_SKIP_ADDING return self.user_choice +class CookieHelpDialog(QDialog): + """A dialog to explain how to get a cookies.txt file.""" + # Define constants for user choices + CHOICE_PROCEED_WITHOUT_COOKIES = 1 + CHOICE_CANCEL_DOWNLOAD = 2 + CHOICE_OK_INFO_ONLY = 3 + + def __init__(self, parent=None, offer_download_without_option=False): + super().__init__(parent) + self.setWindowTitle("Cookie File Instructions") + self.setModal(True) + self.offer_download_without_option = offer_download_without_option + self.user_choice = None # Will be set by button actions + + # Main layout + main_layout = QVBoxLayout(self) + + instruction_text = """ +

To use cookies, you typically need a cookies.txt file from your browser.

+

How to get cookies.txt:

+
    +
  1. Install the 'Get cookies.txt LOCALLY' extension for your Chrome-based browser: +
    Get cookies.txt LOCALLY on Chrome Web Store +
  2. +
  3. Go to the website (e.g., kemono.su or coomer.su) and log in if necessary.
  4. +
  5. Click the extension's icon in your browser toolbar.
  6. +
  7. Click an 'Export' button (e.g., "Export As", "Export cookies.txt" - the exact wording might vary depending on the extension version).
  8. +
  9. Save the downloaded cookies.txt file to your computer.
  10. +
  11. In this application: + +
  12. +
+

Alternatively, some extensions might allow you to copy the cookie string directly. If so, you can paste it into the text field instead of browsing for a file.

+ """ + info_label = QLabel(instruction_text) + info_label.setTextFormat(Qt.RichText) + info_label.setOpenExternalLinks(True) + info_label.setWordWrap(True) + main_layout.addWidget(info_label) + + # Button layout + button_layout = QHBoxLayout() + if self.offer_download_without_option: + button_layout.addStretch(1) # Push both buttons to the right + + self.download_without_button = QPushButton("Download without Cookies") + self.download_without_button.clicked.connect(self._proceed_without_cookies) + button_layout.addWidget(self.download_without_button) + + self.cancel_button = QPushButton("Cancel Download") + self.cancel_button.clicked.connect(self._cancel_download) + button_layout.addWidget(self.cancel_button) + else: + button_layout.addStretch(1) # Push OK to the right + self.ok_button = QPushButton("OK") + self.ok_button.clicked.connect(self._ok_info_only) + button_layout.addWidget(self.ok_button) + + main_layout.addLayout(button_layout) + + if parent and hasattr(parent, 'get_dark_theme'): + self.setStyleSheet(parent.get_dark_theme()) + self.setMinimumWidth(500) + + def _proceed_without_cookies(self): + self.user_choice = self.CHOICE_PROCEED_WITHOUT_COOKIES + self.accept() # or self.done(QDialog.Accepted) + + def _cancel_download(self): + self.user_choice = self.CHOICE_CANCEL_DOWNLOAD + self.reject() # or self.done(QDialog.Rejected) + + def _ok_info_only(self): + self.user_choice = self.CHOICE_OK_INFO_ONLY + self.accept() # or self.done(QDialog.Accepted) + class KnownNamesFilterDialog(QDialog): """A dialog to select names from Known.txt to add to the filter input.""" def __init__(self, known_names_list, parent=None): @@ -414,15 +494,16 @@ class FavoriteArtistsDialog(QDialog): self.cookies_config['app_base_dir'], self._logger ) - if self.cookies_config['use_cookie'] and not cookies_dict: self.status_label.setText("Error: Cookies enabled but could not be loaded. Cannot fetch favorites.") self._show_content_elements(False) - self._logger("Error: Cookies enabled but could not be loaded.") - QMessageBox.warning(self, "Cookie Error", "Cookies are enabled, but no valid cookies could be loaded. Please check your cookie settings or file.") + self._logger("Error: Cookies enabled but could not be loaded. Showing help dialog.") + + cookie_help_dialog = CookieHelpDialog(self) + cookie_help_dialog.exec_() + self.download_button.setEnabled(False) return - try: headers = {'User-Agent': 'Mozilla/5.0'} response = requests.get(fav_url, headers=headers, cookies=cookies_dict, timeout=20) @@ -549,7 +630,7 @@ class FavoritePostsFetcherThread(QThread): ) if self.cookies_config['use_cookie'] and not cookies_dict: - self.finished.emit([], "Error: Cookies enabled but could not be loaded.") + self.finished.emit([], "COOKIES_REQUIRED_BUT_NOT_FOUND") return try: @@ -734,12 +815,18 @@ class FavoritePostsDialog(QDialog): def _on_fetch_completed(self, fetched_posts_list, error_msg): if error_msg: - self.status_label.setText(error_msg) - self._logger(error_msg) # Log to main app log - QMessageBox.critical(self, "Fetch Error", error_msg) - # Keep download button disabled or handle as appropriate + if error_msg == "COOKIES_REQUIRED_BUT_NOT_FOUND": + self.status_label.setText("Error: Cookies are required for favorite posts but could not be loaded.") + self._logger("Error: Cookies required for favorite posts but not found. Showing help dialog.") + cookie_help_dialog = CookieHelpDialog(self) + cookie_help_dialog.exec_() + self.download_button.setEnabled(False) # Ensure it's disabled + else: + self.status_label.setText(error_msg) + self._logger(error_msg) # Log to main app log + QMessageBox.critical(self, "Fetch Error", error_msg) + self.download_button.setEnabled(False) return - self.progress_bar.setVisible(False) self.all_fetched_posts = fetched_posts_list self._populate_post_list_widget() # This will now group and display @@ -1459,8 +1546,8 @@ class DownloaderApp(QWidget): log_signal = pyqtSignal(str) add_character_prompt_signal = pyqtSignal(str) overall_progress_signal = pyqtSignal(int, int) - finished_signal = pyqtSignal(int, int, bool, list) - external_link_signal = pyqtSignal(str, str, str, str) + finished_signal = pyqtSignal(int, int, bool, list) # total_downloaded, total_skipped, cancelled_by_user, kept_original_names_list + external_link_signal = pyqtSignal(str, str, str, str, str) # post_title, link_text, link_url, platform, decryption_key file_progress_signal = pyqtSignal(str, object) @@ -2648,8 +2735,8 @@ class DownloaderApp(QWidget): multi_thread_active = fetcher_active or pool_has_active_tasks return single_thread_active or multi_thread_active - def handle_external_link_signal(self, post_title, link_text, link_url, platform): - link_data = (post_title, link_text, link_url, platform) + def handle_external_link_signal(self, post_title, link_text, link_url, platform, decryption_key): + link_data = (post_title, link_text, link_url, platform, decryption_key) self.external_link_queue.append(link_data) if self.radio_only_links and self.radio_only_links.isChecked(): self.extracted_links_cache.append(link_data) @@ -2682,7 +2769,7 @@ class DownloaderApp(QWidget): def _display_and_schedule_next(self, link_data): - post_title, link_text, link_url, platform = link_data + post_title, link_text, link_url, platform, decryption_key = link_data is_only_links_mode = self.radio_only_links and self.radio_only_links.isChecked() max_link_text_len = 35 @@ -2690,6 +2777,9 @@ class DownloaderApp(QWidget): formatted_link_info = f"{display_text} - {link_url} - {platform}" separator = "-" * 45 + if decryption_key: + formatted_link_info += f" (Decryption Key: {decryption_key})" + if is_only_links_mode: if post_title != self._current_link_post_title: self.log_signal.emit(HTML_PREFIX + "
" + separator + "
") @@ -2916,13 +3006,15 @@ class DownloaderApp(QWidget): current_title_for_display = None separator = "-" * 45 - for post_title, link_text, link_url, platform in self.extracted_links_cache: + for post_title, link_text, link_url, platform, decryption_key in self.extracted_links_cache: matches_search = ( not search_term or search_term in link_text.lower() or search_term in link_url.lower() or - search_term in platform.lower() + search_term in platform.lower() or + (decryption_key and search_term in decryption_key.lower()) ) + if matches_search: if post_title != current_title_for_display: self.main_log_output.insertHtml("
" + separator + "
") @@ -2933,6 +3025,8 @@ class DownloaderApp(QWidget): max_link_text_len = 35 display_text = link_text[:max_link_text_len].strip() + "..." if len(link_text) > max_link_text_len else link_text formatted_link_info = f"{display_text} - {link_url} - {platform}" + if decryption_key: + formatted_link_info += f" (Decryption Key: {decryption_key})" self.main_log_output.append(formatted_link_info) if self.main_log_output.toPlainText().strip(): @@ -2956,13 +3050,16 @@ class DownloaderApp(QWidget): with open(filepath, 'w', encoding='utf-8') as f: current_title_for_export = None separator = "-" * 60 + "\n" - for post_title, link_text, link_url, platform in self.extracted_links_cache: + for post_title, link_text, link_url, platform, decryption_key in self.extracted_links_cache: if post_title != current_title_for_export: if current_title_for_export is not None: f.write("\n" + separator + "\n") f.write(f"Post Title: {post_title}\n\n") current_title_for_export = post_title - f.write(f" {link_text} - {link_url} - {platform}\n") + line_to_write = f" {link_text} - {link_url} - {platform}" + if decryption_key: + line_to_write += f" (Decryption Key: {decryption_key})" + f.write(line_to_write + "\n") self.log_signal.emit(f"✅ Links successfully exported to: {filepath}") QMessageBox.information(self, "Export Successful", f"Links exported to:\n{filepath}") except Exception as e: @@ -3665,7 +3762,35 @@ class DownloaderApp(QWidget): use_cookie_from_checkbox = self.use_cookie_checkbox.isChecked() if hasattr(self, 'use_cookie_checkbox') else False app_base_dir_for_cookies = os.path.dirname(self.config_file) # Directory of Known.txt cookie_text_from_input = self.cookie_text_input.text().strip() if hasattr(self, 'cookie_text_input') and use_cookie_from_checkbox else "" + + use_cookie_for_this_run = use_cookie_from_checkbox # Initialize with checkbox state selected_cookie_file_path_for_backend = self.selected_cookie_filepath if use_cookie_from_checkbox and self.selected_cookie_filepath else None + + if use_cookie_from_checkbox and not direct_api_url: # Don't show for individual items in favorite queue if they fail this check + # Perform an early check for cookies if 'Use Cookie' is checked for the main UI interaction + # The actual cookies for download/API calls will be prepared by the backend. + # This is for proactive UI feedback. + temp_cookies_for_check = prepare_cookies_for_request( + use_cookie_for_this_run, # Use the potentially modified flag + cookie_text_from_input, + selected_cookie_file_path_for_backend, + app_base_dir_for_cookies, + lambda msg: self.log_signal.emit(f"[UI Cookie Check] {msg}") + ) + if temp_cookies_for_check is None: + cookie_dialog = CookieHelpDialog(self, offer_download_without_option=True) + dialog_exec_result = cookie_dialog.exec_() + + if cookie_dialog.user_choice == CookieHelpDialog.CHOICE_PROCEED_WITHOUT_COOKIES and dialog_exec_result == QDialog.Accepted: + self.log_signal.emit("â„šī¸ User chose to download without cookies for this session.") + use_cookie_for_this_run = False # Override for this run + elif cookie_dialog.user_choice == CookieHelpDialog.CHOICE_CANCEL_DOWNLOAD or dialog_exec_result == QDialog.Rejected: + self.log_signal.emit("❌ Download cancelled by user at cookie prompt.") + return False + else: # Any other case, including closing the dialog via 'X' button + self.log_signal.emit("âš ī¸ Cookie dialog closed or unexpected choice. Aborting download.") + return False + current_skip_words_scope = self.get_skip_words_scope() manga_mode_is_checked = self.manga_mode_checkbox.isChecked() if self.manga_mode_checkbox else False @@ -4072,7 +4197,7 @@ class DownloaderApp(QWidget): 'selected_cookie_file': selected_cookie_file_path_for_backend, # Pass selected cookie file 'manga_global_file_counter_ref': manga_global_file_counter_ref_for_thread, # Pass new counter 'app_base_dir': app_base_dir_for_cookies, # Pass app base dir - 'use_cookie': use_cookie_from_checkbox, # Pass cookie setting + 'use_cookie': use_cookie_for_this_run, # Pass the potentially modified cookie setting } args_template['override_output_dir'] = override_output_dir # Pass override dir in template @@ -5514,7 +5639,12 @@ class DownloaderApp(QWidget): if selected_artists: if len(selected_artists) > 1: display_names = ", ".join([artist['name'] for artist in selected_artists]) - self.link_input.setText(display_names) + # For multiple artists, we don't set the link_input as it's confusing. + # The queue will handle individual URLs. + # self.link_input.setText(display_names) # Avoid setting this + if self.link_input: # Clear it if it was showing a single URL before + self.link_input.clear() + self.link_input.setPlaceholderText(f"{len(selected_artists)} favorite artists selected for download queue.") self.log_signal.emit(f"â„šī¸ Multiple favorite artists selected. Displaying names: {display_names}") elif len(selected_artists) == 1: self.link_input.setText(selected_artists[0]['url']) # Show the single URL @@ -5545,6 +5675,20 @@ class DownloaderApp(QWidget): } global KNOWN_NAMES # Ensure we have access to the global + # Perform cookie check before showing the FavoritePostsDialog if cookies are enabled + if cookies_config['use_cookie']: + temp_cookies_for_check = prepare_cookies_for_request( + cookies_config['use_cookie'], + cookies_config['cookie_text'], + cookies_config['selected_cookie_file'], + cookies_config['app_base_dir'], + lambda msg: self.log_signal.emit(f"[FavPosts Cookie Check] {msg}") + ) + if temp_cookies_for_check is None: + cookie_help_dialog = CookieHelpDialog(self) + cookie_help_dialog.exec_() + return # Don't proceed to show FavoritePostsDialog if cookies are needed but not found + dialog = FavoritePostsDialog(self, cookies_config, KNOWN_NAMES) # Pass KNOWN_NAMES if dialog.exec_() == QDialog.Accepted: selected_posts = dialog.get_selected_posts()