From fdcb81f1a01208b3ab390a99916ebc8e30226f66 Mon Sep 17 00:00:00 2001 From: Felipe <41008398+StrawberryMaster@users.noreply.github.com> Date: Tue, 31 Dec 2024 16:50:50 +0000 Subject: [PATCH] Refactoring --- lib/wayback_machine_downloader/to_regex.rb | 116 +++++++++++++-------- 1 file changed, 71 insertions(+), 45 deletions(-) diff --git a/lib/wayback_machine_downloader/to_regex.rb b/lib/wayback_machine_downloader/to_regex.rb index d9f0a67..ce25499 100644 --- a/lib/wayback_machine_downloader/to_regex.rb +++ b/lib/wayback_machine_downloader/to_regex.rb @@ -1,17 +1,25 @@ +# frozen_string_literal: true + module ToRegex module StringMixin + INLINE_OPTIONS = /[imxnesu]*/i.freeze + REGEXP_DELIMITERS = { + '%r{' => '}'.freeze, + '/' => '/'.freeze + }.freeze + + REGEX_FLAGS = { + ignore_case: Regexp::IGNORECASE, + multiline: Regexp::MULTILINE, + extended: Regexp::EXTENDED + }.freeze + class << self def literal?(str) - REGEXP_DELIMITERS.none? { |s, e| str.start_with?(s) and str =~ /#{e}#{INLINE_OPTIONS}\z/ } + REGEXP_DELIMITERS.none? { |start, ending| str.start_with?(start) && str.match?(/#{ending}#{INLINE_OPTIONS}\z/) } end end - INLINE_OPTIONS = /[imxnesu]*/ - REGEXP_DELIMITERS = { - '%r{' => '}', - '/' => '/', - } - # Get a regex back # # Without :literal or :detect, `"foo".to_regex` will return nil. @@ -24,58 +32,76 @@ module ToRegex # @option options [true,false] :extended /foo/x # @option options [true,false] :lang /foo/[nesu] def to_regex(options = {}) - if args = as_regexp(options) - ::Regexp.new(*args) - end + args = as_regexp(options) + args ? Regexp.new(*args) : nil end - # Return arguments that can be passed to `Regexp.new` # @see to_regexp def as_regexp(options = {}) - unless options.is_a?(::Hash) - raise ::ArgumentError, "[to_regexp] Options must be a Hash" - end + raise ArgumentError, '[to_regexp] Options must be a Hash' unless options.is_a?(Hash) + str = self + return if options[:detect] && str.empty? - return if options[:detect] and str == '' - - if options[:literal] or (options[:detect] and ToRegexp::String.literal?(str)) - content = ::Regexp.escape str - elsif delim_set = REGEXP_DELIMITERS.detect { |k, _| str.start_with?(k) } - delim_start, delim_end = delim_set - /\A#{delim_start}(.*)#{delim_end}(#{INLINE_OPTIONS})\z/u =~ str - content = $1 - inline_options = $2 - return unless content.is_a?(::String) - content.gsub! '\\/', '/' - if inline_options - options[:ignore_case] = true if inline_options.include?('i') - options[:multiline] = true if inline_options.include?('m') - options[:extended] = true if inline_options.include?('x') - # 'n', 'N' = none, 'e', 'E' = EUC, 's', 'S' = SJIS, 'u', 'U' = UTF-8 - options[:lang] = inline_options.scan(/[nesu]/i).join.downcase - end + if should_treat_as_literal?(str, options) + content = Regexp.escape(str) + elsif (delim_set = extract_delimiters(str)) + content, options = parse_regexp_string(str, delim_set, options) + return unless content else return end - ignore_case = options[:ignore_case] ? ::Regexp::IGNORECASE : 0 - multiline = options[:multiline] ? ::Regexp::MULTILINE : 0 - extended = options[:extended] ? ::Regexp::EXTENDED : 0 - lang = options[:lang] || '' - if ::RUBY_VERSION > '1.9' and lang.include?('u') - lang = lang.delete 'u' - end + build_regexp_args(content, options) + end - if lang.empty? - [ content, (ignore_case|multiline|extended) ] - else - [ content, (ignore_case|multiline|extended), lang ] - end + private + + def should_treat_as_literal?(str, options) + options[:literal] || (options[:detect] && ToRegex::StringMixin.literal?(str)) + end + + def extract_delimiters(str) + REGEXP_DELIMITERS.find { |start, _| str.start_with?(start) } + end + + def parse_regexp_string(str, delim_set, options) + start_delim, end_delim = delim_set + match = /\A#{start_delim}(.*)#{end_delim}(#{INLINE_OPTIONS})\z/u.match(str) + return unless match + + content = match[1].gsub('\\/', '/') + parse_inline_options(match[2], options) + [content, options] + end + + def parse_inline_options(inline_options, options) + return unless inline_options + options[:ignore_case] = true if inline_options.include?('i') + options[:multiline] = true if inline_options.include?('m') + options[:extended] = true if inline_options.include?('x') + # 'n', 'N' = none, 'e', 'E' = EUC, 's', 'S' = SJIS, 'u', 'U' = UTF-8 + options[:lang] = inline_options.scan(/[nesu]/i).join.downcase + end + + def build_regexp_args(content, options) + flags = calculate_flags(options) + lang = normalize_lang_option(options[:lang]) + + lang.empty? ? [content, flags] : [content, flags, lang] + end + + def calculate_flags(options) + REGEX_FLAGS.sum { |key, value| options[key] ? value : 0 } + end + + def normalize_lang_option(lang) + return '' unless lang + RUBY_VERSION >= '1.9' ? lang.delete('u') : lang end end end class String include ToRegex::StringMixin -end +end \ No newline at end of file