Changeset 86
- Timestamp:
- 08/20/08 14:46:31 (3 months ago)
- Location:
- trunk
- Files:
-
- 57 added
- 8 removed
- 5 modified
-
. (modified) (1 prop)
-
LICENSE (deleted)
-
LICENSE.txt (deleted)
-
MANIFEST (deleted)
-
Rakefile (added)
-
bin/bluecloth (modified) (1 diff)
-
docs/CATALOG (deleted)
-
docs/makedocs.rb (deleted)
-
experiments/patch_test.rb (added)
-
experiments/warning_bug.rb (added)
-
install.rb (deleted)
-
lib/bluecloth.rb (modified) (39 diffs)
-
makedist.rb (deleted)
-
project.yml (added)
-
spec (added)
-
spec/bluecloth_spec.rb (added)
-
spec/bugfix_spec.rb (added)
-
spec/contributions_spec.rb (added)
-
spec/data (added)
-
spec/data/antsugar.txt (added)
-
spec/data/markdowntest (added)
-
spec/data/markdowntest/Amps and angle encoding.html (added)
-
spec/data/markdowntest/Amps and angle encoding.text (added)
-
spec/data/markdowntest/Auto links.html (added)
-
spec/data/markdowntest/Auto links.text (added)
-
spec/data/markdowntest/Backslash escapes.html (added)
-
spec/data/markdowntest/Backslash escapes.text (added)
-
spec/data/markdowntest/Blockquotes with code blocks.html (added)
-
spec/data/markdowntest/Blockquotes with code blocks.text (added)
-
spec/data/markdowntest/Hard-wrapped paragraphs with list-like lines.html (added)
-
spec/data/markdowntest/Hard-wrapped paragraphs with list-like lines.text (added)
-
spec/data/markdowntest/Horizontal rules.html (added)
-
spec/data/markdowntest/Horizontal rules.text (added)
-
spec/data/markdowntest/Inline HTML (Advanced).html (added)
-
spec/data/markdowntest/Inline HTML (Advanced).text (added)
-
spec/data/markdowntest/Inline HTML (Simple).html (added)
-
spec/data/markdowntest/Inline HTML (Simple).text (added)
-
spec/data/markdowntest/Inline HTML comments.html (added)
-
spec/data/markdowntest/Inline HTML comments.text (added)
-
spec/data/markdowntest/Links, inline style.html (added)
-
spec/data/markdowntest/Links, inline style.text (added)
-
spec/data/markdowntest/Links, reference style.html (added)
-
spec/data/markdowntest/Links, reference style.text (added)
-
spec/data/markdowntest/Literal quotes in titles.html (added)
-
spec/data/markdowntest/Literal quotes in titles.text (added)
-
spec/data/markdowntest/Markdown Documentation - Basics.html (added)
-
spec/data/markdowntest/Markdown Documentation - Basics.text (added)
-
spec/data/markdowntest/Markdown Documentation - Syntax.html (added)
-
spec/data/markdowntest/Markdown Documentation - Syntax.text (added)
-
spec/data/markdowntest/Nested blockquotes.html (added)
-
spec/data/markdowntest/Nested blockquotes.text (added)
-
spec/data/markdowntest/Ordered and unordered lists.html (added)
-
spec/data/markdowntest/Ordered and unordered lists.text (added)
-
spec/data/markdowntest/Strong and em together.html (added)
-
spec/data/markdowntest/Strong and em together.text (added)
-
spec/data/markdowntest/Tabs.html (added)
-
spec/data/markdowntest/Tabs.text (added)
-
spec/data/markdowntest/Tidyness.html (added)
-
spec/data/markdowntest/Tidyness.text (added)
-
spec/data/ml-announce.txt (added)
-
spec/data/re-overflow.txt (added)
-
spec/data/re-overflow2.txt (added)
-
spec/lib (added)
-
spec/lib/constants.rb (added)
-
spec/lib/matchers.rb (added)
-
spec/markdowntest_spec.rb (added)
-
spec/spec.opts (added)
-
test.rb (deleted)
-
tests/contrib.tests.rb (modified) (1 diff)
-
web/index.html (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk
- Property svn:externals set to
-
trunk/bin/bluecloth
r50 r86 65 65 66 66 bc = BlueCloth::new( contents.join ) 67 $stderr.puts "Using BlueCloth version #{BlueCloth::Version}" 67 68 68 69 if fragment -
trunk/lib/bluecloth.rb
r69 r86 6 6 # == Synopsis 7 7 # 8 # doc = BlueCloth ::new "8 # doc = BlueCloth.new " 9 9 # ## Test document ## 10 10 # … … 26 26 # 27 27 # Original version: 28 # Copyright (c) 200 3-2004 John Gruber28 # Copyright (c) 2004, 2005, John Gruber 29 29 # <http://daringfireball.net/> 30 30 # All rights reserved. 31 31 # 32 # Redistribution and use in source and binary forms, with or without 33 # modification, are permitted provided that the following conditions are 34 # met: 35 # 36 # * Redistributions of source code must retain the above copyright notice, 37 # this list of conditions and the following disclaimer. 38 # 39 # * Redistributions in binary form must reproduce the above copyright 40 # notice, this list of conditions and the following disclaimer in the 41 # documentation and/or other materials provided with the distribution. 42 # 43 # * Neither the name "Markdown" nor the names of its contributors may 44 # be used to endorse or promote products derived from this software 45 # without specific prior written permission. 46 # 47 # This software is provided by the copyright holders and contributors "as 48 # is" and any express or implied warranties, including, but not limited 49 # to, the implied warranties of merchantability and fitness for a 50 # particular purpose are disclaimed. In no event shall the copyright owner 51 # or contributors be liable for any direct, indirect, incidental, special, 52 # exemplary, or consequential damages (including, but not limited to, 53 # procurement of substitute goods or services; loss of use, data, or 54 # profits; or business interruption) however caused and on any theory of 55 # liability, whether in contract, strict liability, or tort (including 56 # negligence or otherwise) arising in any way out of the use of this 57 # software, even if advised of the possibility of such damage. 58 # 32 59 # Ruby port: 33 # Copyright (c) 2004 The FaerieMUD Consortium.60 # Copyright (c) 2004, 2005 The FaerieMUD Consortium. 34 61 # 35 # BlueCloth is free software; you can redistribute it and/or modify it under the36 # terms of the GNU General Public License as published by the Free Software37 # Foundation; either version 2 of the License, or (at your option) any later38 # version.39 # 40 # BlueCloth is distributed in the hope that it will be useful, but WITHOUT ANY41 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR42 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.62 # You may use, modify, and/or redistribute this software under the same terms 63 # as Ruby itself. A copy of Ruby's license should be included in this package; 64 # if not, it can be obtained online at: 65 # http://www.ruby-lang.org/en/LICENSE.txt. 66 # 67 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED 68 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 69 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 43 70 # 44 71 # == To-do … … 83 110 84 111 # Release Version 85 V ersion = '0.0.3'112 VERSION = '1.1.0' 86 113 87 114 # SVN Revision 88 S vnRev= %q$Rev$115 SVNREV = %q$Rev$ 89 116 90 117 # SVN Id tag 91 SvnId = %q$Id$ 92 93 # SVN URL 94 SvnUrl = %q$URL$ 118 SVNID = %q$Id$ 119 95 120 96 121 … … 98 123 # midway through a render. I prefer this to the globals of the Perl version 99 124 # because globals make me break out in hives. Or something. 100 RenderState = Struct ::new( "RenderState", :urls, :titles, :html_blocks, :log )125 RenderState = Struct.new( "RenderState", :urls, :titles, :html_blocks, :list_level, :log ) 101 126 102 127 # Tab width for #detab! if none is specified 103 TabWidth = 4 128 # :TODO: Make this DEFAULT_TAB_WIDTH and make tab width a per-instance setting instead. 129 TAB_WIDTH = 4 130 LESS_THAN_TAB_WIDTH = TAB_WIDTH - 1 104 131 105 132 # The tag-closing string -- set to '>' for HTML 106 E mptyElementSuffix= "/>";133 EMPTY_ELEMENT_SUFFIX = "/>"; 107 134 108 135 # Table of MD5 sums for escaped characters 109 E scapeTable= {}110 '\\`*_{}[]() #.!'.split(//).each {|char|111 hash = Digest::MD5 ::hexdigest( char )112 113 E scapeTable[ char ] = {136 ESCAPE_TABLE = {} 137 '\\`*_{}[]()>#+-.!'.split(//).each {|char| 138 hash = Digest::MD5.hexdigest( char ) 139 140 ESCAPE_TABLE[ char ] = { 114 141 :md5 => hash, 115 :md5re => Regexp ::new( hash ),116 :re => Regexp ::new( '\\\\' + Regexp::escape(char) ),142 :md5re => Regexp.new( hash ), 143 :re => Regexp.new( '\\\\' + Regexp.escape(char) ), 117 144 } 118 145 } … … 125 152 ### Create a new BlueCloth string. 126 153 def initialize( content="", *restrictions ) 127 @log = Logger ::new( $deferr )154 @log = Logger.new( $deferr ) 128 155 @log.level = $DEBUG ? 129 156 Logger::DEBUG : … … 164 191 165 192 # Create a StringScanner we can reuse for various lexing tasks 166 @scanner = StringScanner ::new( '' )193 @scanner = StringScanner.new( '' ) 167 194 168 195 # Make a structure to carry around stuff that gets placeholdered out of 169 196 # the source. 170 rs = RenderState ::new( {}, {}, {})197 rs = RenderState.new( {}, {}, {}, 0 ) 171 198 172 199 # Make a copy of the string with normalized line endings, tabs turned to … … 214 241 215 242 ### Convert tabs in +str+ to spaces. 216 def detab( tabwidth=T abWidth)243 def detab( tabwidth=TAB_WIDTH ) 217 244 copy = self.dup 218 245 copy.detab!( tabwidth ) … … 222 249 223 250 ### Convert tabs to spaces in place and return self if any were converted. 224 def detab!( tabwidth=T abWidth)251 def detab!( tabwidth=TAB_WIDTH ) 225 252 newstr = self.split( /\n/ ).collect {|line| 226 253 line.gsub( /(.*?)\t/ ) do … … 247 274 text = transform_code_blocks( text, rs ) 248 275 text = transform_block_quotes( text, rs ) 249 text = transform_auto_links( text, rs )250 276 text = hide_html_blocks( text, rs ) 251 277 … … 263 289 264 290 str = transform_code_spans( str, rs ) 265 str = e ncode_html( str )291 str = escape_special_chars( str ) 266 292 str = transform_images( str, rs ) 267 293 str = transform_anchors( str, rs ) 294 str = transform_auto_links( str, rs ) 295 str = encode_html( str ) 268 296 str = transform_italic_and_bold( str, rs ) 269 297 270 298 # Hard breaks 271 str.gsub!( / {2,}\n/, "<br#{E mptyElementSuffix}\n" )299 str.gsub!( / {2,}\n/, "<br#{EMPTY_ELEMENT_SUFFIX}\n" ) 272 300 273 301 @log.debug "Done with span transforms:\n %p" % str … … 320 348 ) 321 349 ( # save in $2 322 [ ] *# Any spaces350 [ ]{0,#{LESS_THAN_TAB_WIDTH}} # Any spaces 323 351 <hr # Tag open 324 352 \b # Word break … … 329 357 }ix 330 358 359 # Special case for standalone HTML comments 360 CommentBlockRegex = %r{ 361 ( # $1 362 \A\n? # Start of doc + optional \n 363 | # or 364 .*\n\n # anything + blank line 365 ) 366 ( # save in $2 367 [ ]{0,#{LESS_THAN_TAB_WIDTH}} # Any spaces 368 (?: 369 <! 370 (--.*?--\s*)+ 371 > 372 ) 373 $ # followed by a blank line or end of document 374 ) 375 }ix 376 331 377 ### Replace all blocks of HTML in +str+ that start in the left margin with 332 378 ### tokens. … … 336 382 # Tokenizer proc to pass to gsub 337 383 tokenize = lambda {|match| 338 key = Digest::MD5 ::hexdigest( match )384 key = Digest::MD5.hexdigest( match ) 339 385 rs.html_blocks[ key ] = match 340 386 @log.debug "Replacing %p with %p" % [ match, key ] … … 353 399 rval.gsub!( HruleBlockRegex ) {|match| $1 + tokenize[$2] } 354 400 401 @log.debug "Finding comments..." 402 rval.gsub!( CommentBlockRegex ) {|match| $1 + tokenize[$2] } 403 355 404 return rval 356 405 end … … 359 408 # Link defs are in the form: ^[id]: url "optional title" 360 409 LinkRegex = %r{ 361 ^[ ] *\[(.+)\]: # id = $1410 ^[ ]{0,#{LESS_THAN_TAB_WIDTH}}\[(.+)\]: # id = $1 362 411 [ ]* 363 412 \n? # maybe *one* newline … … 397 446 text = '' 398 447 399 # The original Markdown source has something called '$tags_to_skip' 400 # declared here, but it's never used, so I don't define it. 401 448 # Split the HTML into tags and text, calling back into this block for 449 # each chunk. 402 450 tokenize_html( str ) {|token, str| 403 451 @log.debug " Adding %p token %p" % [ token, str ] … … 407 455 when :tag 408 456 text += str. 409 gsub( /\*/, E scapeTable['*'][:md5] ).410 gsub( /_/, E scapeTable['_'][:md5] )457 gsub( /\*/, ESCAPE_TABLE['*'][:md5] ). 458 gsub( /_/, ESCAPE_TABLE['_'][:md5] ) 411 459 412 460 # Encode backslashed stuff in regular text … … 426 474 ### it. 427 475 def unescape_special_chars( str ) 428 E scapeTable.each {|char, hash|476 ESCAPE_TABLE.each {|char, hash| 429 477 @log.debug "Unescaping escaped %p with %p" % [ char, hash[:md5re] ] 430 478 str.gsub!( hash[:md5re], char ) … … 439 487 def encode_backslash_escapes( str ) 440 488 # Make a copy with any double-escaped backslashes encoded 441 text = str.gsub( /\\\\/, E scapeTable['\\'][:md5] )489 text = str.gsub( /\\\\/, ESCAPE_TABLE['\\'][:md5] ) 442 490 443 E scapeTable.each_pair {|char, esc|491 ESCAPE_TABLE.each_pair {|char, esc| 444 492 next if char == '\\' 445 493 text.gsub!( esc[:re], esc[:md5] ) … … 454 502 def transform_hrules( str, rs ) 455 503 @log.debug " Transforming horizontal rules" 456 str.gsub( /^ ( ?[\-\*_] ?){3,}$/, "\n<hr#{EmptyElementSuffix}\n" )504 str.gsub( /^[ ]{0,2}( ?[\-\*_] ?){3,} *$/, "\n<hr#{EMPTY_ELEMENT_SUFFIX}\n" ) 457 505 end 458 506 … … 462 510 ListMarkerOl = %r{\d+\.} 463 511 ListMarkerUl = %r{[*+-]} 464 ListMarkerAny = Regexp::union( ListMarkerOl, ListMarkerUl ) 465 466 ListRegexp = %r{ 467 (?: 468 ^[ ]{0,#{TabWidth - 1}} # Indent < tab width 469 (#{ListMarkerAny}) # unordered or ordered ($1) 470 [ ]+ # At least one space 471 ) 472 (?m:.+?) # item content (include newlines) 473 (?: 474 \z # Either EOF 475 | # or 476 \n{2,} # Blank line... 477 (?=\S) # ...followed by non-space 478 (?![ ]* # ...but not another item 479 (#{ListMarkerAny}) 480 [ ]+) 481 ) 512 ListMarkerAny = Regexp.union( ListMarkerOl, ListMarkerUl ) 513 514 # Part of list-pattern common to both first-level and n-level lists 515 ListBodyPattern = %Q{ 516 (?: 517 [ ]{0,#{LESS_THAN_TAB_WIDTH}} # Indent < tab width 518 (#{ListMarkerAny}) # $3 (see below): unordered or ordered 519 [ ]+ # At least one space 520 ) 521 (?m:.+?) # item content (include newlines) 522 (?: 523 \\z # Either EOF 524 | # or 525 \\n{2,} # Blank line... 526 (?=\S) # ...followed by non-space 527 (?![ ]* # ...but not another item 528 (#{ListMarkerAny}) 529 [ ]+) 530 ) 531 } 532 533 # Regexp to match first-level lists 534 OuterListRegexp = %r{ 535 ( # $1 536 \A\n? # Start of doc + optional \n 537 | # or 538 .*\n\n # anything + blank line 539 ) 540 (#{ListBodyPattern}) # $2 541 }x 542 543 # Regexp to match n-level lists 544 InnerListRegexp = %r{ 545 (^) # $1 546 (#{ListBodyPattern}) # $2 482 547 }x 483 548 … … 487 552 @log.debug " Transforming lists at %p" % (str[0,100] + '...') 488 553 489 str.gsub( ListRegexp ) {|list| 490 @log.debug " Found list %p" % list 491 bullet = $1 554 # Choose a regexp based on whether we're already in a list or not 555 re = if rs.list_level.zero? then OuterListRegexp else InnerListRegexp end 556 557 # Use the chosen regexp to find lists 558 str.gsub( re ) { 559 pre, list, bullet = $1, $2, $3 560 @log.debug " Found list bullet %p after %p: %p" % 561 [ bullet, pre, list ] 562 492 563 list_type = (ListMarkerUl.match(bullet) ? "ul" : "ol") 493 564 list.gsub!( /\n{2,}/, "\n\n\n" ) 494 565 495 %{<%s>\n%s</%s>\n} % [ 566 %{%s<%s>\n%s</%s>\n} % [ 567 pre, 496 568 list_type, 497 569 transform_list_items( list, rs ), … … 516 588 @log.debug " Transforming list items" 517 589 590 # Increment the marker for parsing sublists 591 rs.list_level += 1 592 518 593 # Trim trailing blank lines 519 594 str = str.sub( /\n{2,}\z/, "\n" ) … … 535 610 %{<li>%s</li>\n} % item 536 611 } 612 ensure 613 # Decrement the list-level counter 614 rs.list_level -= 1 537 615 end 538 616 … … 543 621 ( # $1 = the code block 544 622 (?: 545 (?:[ ]{#{T abWidth}} | \t) # a tab or tab-width of spaces623 (?:[ ]{#{TAB_WIDTH}} | \t) # a tab or tab-width of spaces 546 624 .*\n+ 547 625 )+ 548 626 ) 549 (^[ ]{0,#{T abWidth- 1}}\S|\Z) # Lookahead for non-space at627 (^[ ]{0,#{TAB_WIDTH - 1}}\S|\Z) # Lookahead for non-space at 550 628 # line-start, or end of doc 551 629 }x … … 589 667 quote.gsub!( /^ +$/, '' ) # Trim whitespace-only lines 590 668 591 indent = " " * T abWidth669 indent = " " * TAB_WIDTH 592 670 quoted = %{<blockquote>\n%s\n</blockquote>\n\n} % 593 671 apply_block_transforms( quote, rs ). … … 770 848 771 849 # Scan the whole string 772 until @scanner.e mpty?850 until @scanner.eos? 773 851 774 852 if @scanner.scan( /\[/ ) … … 857 935 end 858 936 859 end # until @scanner.e mpty?937 end # until @scanner.eos? 860 938 861 939 return text … … 899 977 900 978 # Scan to the end of the string 901 until @scanner.e mpty?979 until @scanner.eos? 902 980 903 981 # Scan up to an opening backtick … … 909 987 opener = @scanner.scan( /`+/ ) 910 988 len = opener.length 911 closer = Regexp ::new( opener )989 closer = Regexp.new( opener ) 912 990 @log.debug "Scanning for end of code span with %p" % closer 913 991 … … 916 994 # whitespace, and encode any enitites contained in it. 917 995 codespan = @scanner.scan_until( closer ) or 918 raise FormatError ::new( @scanner.rest[0,20],996 raise FormatError.new( @scanner.rest[0,20], 919 997 "No %p found before end" % opener ) 920 998 … … 967 1045 ### Turn image markup into image tags. 968 1046 def transform_images( str, rs ) 969 @log.debug " Transforming images " % str1047 @log.debug " Transforming images (%p)" % [str] 970 1048 971 1049 # Handle reference-style labeled images: ![alt text][id] … … 989 1067 result += %{ title="%s"} % escape_md( rs.titles[linkid] ) 990 1068 end 991 result += E mptyElementSuffix1069 result += EMPTY_ELEMENT_SUFFIX 992 1070 993 1071 else … … 1012 1090 result += %{ title="%s"} % escape_md( title ) 1013 1091 end 1014 result += E mptyElementSuffix1092 result += EMPTY_ELEMENT_SUFFIX 1015 1093 1016 1094 @log.debug "Replacing %p with %p" % [ match, result ] … … 1029 1107 gsub( %r{<}, '<' ). 1030 1108 gsub( %r{>}, '>' ). 1031 gsub( CodeEscapeRegexp ) {|match| E scapeTable[match][:md5]}1109 gsub( CodeEscapeRegexp ) {|match| ESCAPE_TABLE[match][:md5]} 1032 1110 end 1033 1111 … … 1042 1120 def escape_md( str ) 1043 1121 str. 1044 gsub( /\*/, E scapeTable['*'][:md5] ).1045 gsub( /_/, E scapeTable['_'][:md5] )1122 gsub( /\*/, ESCAPE_TABLE['*'][:md5] ). 1123 gsub( /_/, ESCAPE_TABLE['_'][:md5] ) 1046 1124 end 1047 1125 … …
