module MaRuKu::Strings
These are strings utilities.
Constants
- Abbreviation
Example:
*[HTML]: Hyper Text Markup Language
- AttributeDefinitionList
$1 = id $2 = attribute list
- Definition
Example:
^:blah blah ^: blah blah ^ : blah blah
- EMailAddress
- FootnoteText
- HeaderWithAttributes
- HeaderWithId
- IncompleteLink
- InlineAttributeList
- LinkRegex
This regex is taken from BlueCloth sources Link defs are in the form: ^[id]: n? url “optional title”
- MightBeTableHeader
if contains a pipe, it could be a table header
- Sep
————-:
- TabSize
- TableSeparator
| ————-:| —————————— |
Public Instance Methods
# File lib/maruku/string_utils.rb, line 25 def add_tabs(s,n=1,char="\t") s.split("\n").map{|x| char*n+x }.join("\n") end
# File lib/maruku/string_utils.rb, line 179 def dbg_describe_ary(a, prefix='') i = 0 a.each do |l| puts "#{prefix} (#{i+=1})# #{l.inspect}" end end
# File lib/maruku/string_utils.rb, line 186 def force_linebreak?(l) l =~ / $/ end
# File lib/maruku/input/type_detection.rb, line 36 def line_md_type(l) # The order of evaluation is important (:text is a catch-all) return :text if l =~ /^[a-zA-Z]/ return :code if number_of_leading_spaces(l)>=4 return :empty if l =~ /^\s*$/ return :footnote_text if l =~ FootnoteText return :ref_definition if l =~ LinkRegex or l=~ IncompleteLink return :abbreviation if l =~ Abbreviation return :definition if l =~ Definition # I had a bug with emails and urls at the beginning of the # line that were mistaken for raw_html return :text if l=~ /^[ ]{0,3}#{EMailAddress}/ return :text if l=~ /^[ ]{0,3}<http:/ # raw html is like PHP Markdown Extra: at most three spaces before return :xml_instr if l =~ %r{^\s*<\?} return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+} return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?<\!\-\-} # Something is wrong with how we parse lists! :-( #return :ulist if l =~ /^[ ]{0,3}([\*\-\+])\s+.*\w+/ #return :olist if l =~ /^[ ]{0,3}\d+\..*\w+/ return :ulist if l =~ /^[ ]{0,1}([\*\-\+])\s+.*\w+/ return :olist if l =~ /^[ ]{0,1}\d+\..*\w+/ return :header1 if l =~ /^(=)+/ return :header2 if l =~ /^([-\s])+$/ return :header3 if l =~ /^(#)+\s*\S+/ # at least three asterisks on a line, and only whitespace return :hrule if l =~ /^(\s*\*\s*){3,1000}$/ return :hrule if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens return :hrule if l =~ /^(\s*_\s*){3,1000}$/ # or underscores return :quote if l =~ /^>/ return :metadata if l =~ /^@/ # if @@new_meta_data? return :ald if l =~ AttributeDefinitionList return :ial if l =~ InlineAttributeList # end # return :equation_end if l =~ EquationEnd return :text # else, it's just text end
Keys are downcased, space becomes underscore, converted to symbols.
# File lib/maruku/string_utils.rb, line 69 def normalize_key_and_value(k,v) v = v ? v.strip : true # no value defaults to true k = k.strip # check synonyms v = true if ['yes','true'].include?(v.to_s.downcase) v = false if ['no','false'].include?(v.to_s.downcase) k = k.downcase.gsub(' ','_') return k, v end
Counts the number of leading '#' in the string
# File lib/maruku/string_utils.rb, line 137 def num_leading_hashes(s) i=0; while i<(s.size-1) && (s[i,1]=='#'); i+=1 end i end
Returns the number of leading spaces, considering that a tab counts as `TabSize` spaces.
# File lib/maruku/string_utils.rb, line 83 def number_of_leading_spaces(s) n=0; i=0; while i < s.size c = s[i,1] if c == ' ' i+=1; n+=1; elsif c == "\t" i+=1; n+=TabSize; else break end end n end
This parses email headers. Returns an hash.
+hash+ is the message.
Keys are downcased, space becomes underscore, converted to symbols.
My key: true
becomes:
{:my_key => true}
# File lib/maruku/string_utils.rb, line 47 def parse_email_headers(s) keys={} match = (s =~ /\A((\w[\w\s\_\-]+: .*\n)+)\s*\n/) if match != 0 keys[:data] = s else keys[:data] = $' headers = $1 headers.split("\n").each do |l| # Fails if there are other ':' characters. # k, v = l.split(':') k, v = l.split(':', 2) k, v = normalize_key_and_value(k, v) k = k.to_sym # puts "K = #{k}, V=#{v}" keys[k] = v end end keys end
change space to “_” and remove any non-word character
# File lib/maruku/string_utils.rb, line 152 def sanitize_ref_id(x) x.strip.downcase.gsub(' ','_').gsub(/[^\w]/,'') end
This returns the position of the first real char in a list item
For example:
'*Hello' # => 1 '* Hello' # => 2 ' * Hello' # => 3 ' * Hello' # => 5 '1.Hello' # => 2 ' 1. Hello' # => 5
# File lib/maruku/string_utils.rb, line 108 def spaces_before_first_char(s) case s.md_type when :ulist i=0; # skip whitespace if present while s[i,1] =~ /\s/; i+=1 end # skip indicator (+, -, *) i+=1 # skip optional whitespace while s[i,1] =~ /\s/; i+=1 end return i when :olist i=0; # skip whitespace while s[i,1] =~ /\s/; i+=1 end # skip digits while s[i,1] =~ /\d/; i+=1 end # skip dot i+=1 # skip whitespace while s[i,1] =~ /\s/; i+=1 end return i else tell_user "BUG (my bad): '#{s}' is not a list" 0 end end
# File lib/maruku/string_utils.rb, line 31 def split_lines(s) s.gsub("\r","").split("\n") end
Strips initial and final hashes
# File lib/maruku/string_utils.rb, line 144 def strip_hashes(s) s = s[num_leading_hashes(s), s.size] i = s.size-1 while i > 0 && (s[i,1] =~ /(#|\s)/); i-=1; end s[0, i+1].strip end
toglie al massimo n caratteri
# File lib/maruku/string_utils.rb, line 163 def strip_indent(s, n) i = 0 while i < s.size && n>0 c = s[i,1] if c == ' ' n-=1; elsif c == "\t" n-=TabSize; else break end i+=1 end s[i, s.size] end
removes initial quote
# File lib/maruku/string_utils.rb, line 158 def unquote(s) s.gsub(/^>\s?/,'') end