mirror of
https://github.com/Yuvi9587/Kemono-Downloader.git
synced 2025-12-29 16:14:44 +00:00
Commit
This commit is contained in:
@@ -154,10 +154,7 @@ def clean_filename(name):
|
||||
def strip_html_tags(html_text):
|
||||
if not html_text: return ""
|
||||
text = html.unescape(str(html_text)) # Ensure input is a string
|
||||
# Replace HTML tags with a single space
|
||||
text_after_tag_removal = re.sub(r'<[^>]+>', ' ', text)
|
||||
# Replace multiple whitespace characters (including newlines, tabs, etc. that are now spaces)
|
||||
# with a single space. Also, strip leading/trailing whitespace from the final result.
|
||||
cleaned_text = re.sub(r'\s+', ' ', text_after_tag_removal).strip()
|
||||
return cleaned_text
|
||||
def extract_folder_name_from_title(title, unwanted_keywords):
|
||||
@@ -1214,7 +1211,6 @@ class PostProcessorWorker:
|
||||
if (self.show_external_links or self.extract_links_only) and post_content_html: # type: ignore
|
||||
if self._check_pause(f"External link extraction for post {post_id}"): return 0, num_potential_files_in_post, [], []
|
||||
try:
|
||||
# Regex for typical Mega decryption keys (43 or 22 chars, alphanumeric + hyphen/underscore)
|
||||
mega_key_pattern = re.compile(r'\b([a-zA-Z0-9_-]{43}|[a-zA-Z0-9_-]{22})\b')
|
||||
unique_links_data = {}
|
||||
for match in link_pattern.finditer(post_content_html):
|
||||
@@ -1234,19 +1230,16 @@ class PostProcessorWorker:
|
||||
platform = get_link_platform(link_url)
|
||||
decryption_key_found = ""
|
||||
if platform == 'mega':
|
||||
# 1. Check if key is in the URL fragment
|
||||
parsed_mega_url = urlparse(link_url)
|
||||
if parsed_mega_url.fragment:
|
||||
potential_key_from_fragment = parsed_mega_url.fragment.split('!')[-1] # Handle cases like #!key or #key
|
||||
if mega_key_pattern.fullmatch(potential_key_from_fragment):
|
||||
decryption_key_found = potential_key_from_fragment
|
||||
|
||||
# 2. If not in fragment, search in link text
|
||||
if not decryption_key_found and link_text:
|
||||
key_match_in_text = mega_key_pattern.search(link_text)
|
||||
if key_match_in_text:
|
||||
decryption_key_found = key_match_in_text.group(1)
|
||||
# 3. If still not found, search the whole post content (if extracting links only, as it's more critical)
|
||||
if not decryption_key_found and self.extract_links_only and post_content_html:
|
||||
key_match_in_content = mega_key_pattern.search(strip_html_tags(post_content_html)) # Search cleaned content
|
||||
if key_match_in_content:
|
||||
|
||||
Reference in New Issue
Block a user