diff --git a/lib/wayback_machine_downloader/tidy_bytes.rb b/lib/wayback_machine_downloader/tidy_bytes.rb
index dbc97af..48b5d35 100644
--- a/lib/wayback_machine_downloader/tidy_bytes.rb
+++ b/lib/wayback_machine_downloader/tidy_bytes.rb
@@ -2,24 +2,56 @@
 
 # essentially, this is for converting a string with a potentially
 # broken or unknown encoding into a valid UTF-8 string
+# @todo: consider using charlock_holmes for this in the future
 module TidyBytes
+  UNICODE_REPLACEMENT_CHARACTER = "�"
+
+  # common encodings to try for best multilingual compatibility
+  COMMON_ENCODINGS = [
+    Encoding::UTF_8,
+    Encoding::Windows_1251, # Cyrillic/Russian legacy
+    Encoding::GB18030,      # Simplified Chinese
+    Encoding::Shift_JIS,    # Japanese
+    Encoding::EUC_KR,       # Korean
+    Encoding::ISO_8859_1,   # Western European
+    Encoding::Windows_1252  # Western European/Latin1 superset
+  ].select { |enc| Encoding.name_list.include?(enc.name) }
+
+  # returns true if the string appears to be binary (has null bytes)
+  def binary_data?
+    self.include?("\x00".b)
+  end
+
+  # attempts to return a valid UTF-8 version of the string
   def tidy_bytes
-    # return if the string is already valid UTF-8
-    return self if self.valid_encoding? && self.encoding == Encoding::UTF_8
+    return self if self.encoding == Encoding::UTF_8 && self.valid_encoding?
+    return self.dup.force_encoding("BINARY") if binary_data?
 
-    # create a mutable copy so we don't modify the original string
     str = self.dup
-
-    # attempt to encode to UTF-8
-    begin
-      return str.encode(Encoding::UTF-8)
-    rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
+    COMMON_ENCODINGS.each do |enc|
+      str.force_encoding(enc)
+      begin
+        utf8 = str.encode(Encoding::UTF_8, invalid: :replace, undef: :replace, replace: UNICODE_REPLACEMENT_CHARACTER)
+        return utf8 if utf8.valid_encoding? && !utf8.include?(UNICODE_REPLACEMENT_CHARACTER)
+      rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
+        # try next encoding
+      end
     end
 
-    # if it failed, force the encoding to ISO-8859-1, transcode the
-    # string to UTF-8, and use replacement options for any characters
-    # that might still be problematic
-    str.force_encoding(Encoding::ISO_8859_1).encode(Encoding::UTF_8, invalid: :replace, undef: :replace, replace: '�')
+    # if no clean conversion found, try again but accept replacement characters
+    str = self.dup
+    COMMON_ENCODINGS.each do |enc|
+      str.force_encoding(enc)
+      begin
+        utf8 = str.encode(Encoding::UTF_8, invalid: :replace, undef: :replace, replace: UNICODE_REPLACEMENT_CHARACTER)
+        return utf8 if utf8.valid_encoding?
+      rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
+        # try next encoding
+      end
+    end
+
+    # fallback: replace all invalid/undefined bytes
+    str.encode(Encoding::UTF_8, invalid: :replace, undef: :replace, replace: UNICODE_REPLACEMENT_CHARACTER)
   end
 
   def tidy_bytes!
@@ -43,4 +75,4 @@ end
 
 class String
   include TidyBytes
-end
+end
\ No newline at end of file