4 # Refer to https://github.github.com/gfm/
5 # for the GitHub Flavored Markdown specification.
7 # TODO: different highlight for different heading level
13 var block_quote = '^ \{,3\}\zs> \='
14 var list_marker = '[-+*]\|[0-9]\{1,9}[.)]'
15 var list_item = $'^\%({list_marker}\)\ze\s*$\|^ \{{,3}}\zs\%({list_marker}\) \{{1,4}}\ze\S\|^ \{{,3}}\zs\%({list_marker}\) \{{5}}\ze\s*\S'
16 # pattern to match list items
17 export var list_pattern = $'^ *\%({list_marker}\) *'
21 var blank_line = '^\s*$'
22 var thematic_break = '^ \{,3\}\([-_*]\)\%(\s*\1\)\{2,\}\s*$'
23 var code_fence = '^ \{,3\}\(`\{3,\}\|\~\{3,\}\)\s*\(\S*\)'
24 var code_indent = '^ \{4\}\zs\s*\S.*'
25 var paragraph = '^\s*\zs\S.\{-}\s*\ze$'
27 var atx_heading = '^ \{,3}\zs\(#\{1,6}\) \(.\{-}\)\ze\%( #\{1,}\s*\)\=$'
28 var setext_heading = '^ \{,3}\zs\%(=\{1,}\|-\{1,}\)\ze *$'
29 var setext_heading_level = {"=": 1, "-": 2}
31 var table_delimiter = '^|\=\zs *:\=-\{1,}:\= *\%(| *:\=-\{1,}:\= *\)*\ze|\=$'
33 var punctuation = "[!\"#$%&'()*+,-./:;<=>?@[\\\\\\\]^_`{|}~]"
35 # Setting text properties
36 highlight LspBold term=bold cterm=bold gui=bold
37 highlight LspItalic term=italic cterm=italic gui=italic
38 highlight LspStrikeThrough term=strikethrough cterm=strikethrough gui=strikethrough
39 prop_type_add('LspMarkdownBold', {highlight: 'LspBold'})
40 prop_type_add('LspMarkdownItalic', {highlight: 'LspItalic'})
41 prop_type_add('LspMarkdownStrikeThrough', {highlight: 'LspStrikeThrough'})
42 prop_type_add('LspMarkdownHeading', {highlight: 'Function'})
43 prop_type_add('LspMarkdownCode', {highlight: 'PreProc'})
44 prop_type_add('LspMarkdownCodeBlock', {highlight: 'PreProc'})
45 prop_type_add('LspMarkdownListMarker', {highlight: 'Special'})
46 prop_type_add('LspMarkdownTableHeader', {highlight: 'Label'})
47 prop_type_add('LspMarkdownTableMarker', {highlight: 'Special'})
50 def GetMarkerProp(marker: string, col: number, ...opt: list<any>): dict<any>
51 if marker == 'list_item'
53 type: 'LspMarkdownListMarker',
57 elseif marker == 'code_block'
59 type: 'LspMarkdownCodeBlock',
64 elseif marker == 'heading'
66 type: 'LspMarkdownHeading',
70 elseif marker == 'table_header'
72 type: 'LspMarkdownTableHeader',
76 elseif marker == 'table_sep'
78 type: 'LspMarkdownTableMarker',
82 elseif marker == 'code_span'
84 type: 'LspMarkdownCode',
88 elseif marker == 'emphasis'
90 type: 'LspMarkdownItalic',
94 elseif marker == 'strong'
96 type: 'LspMarkdownBold',
100 elseif marker == 'strikethrough'
102 type: 'LspMarkdownStrikeThrough',
110 def GetCodeSpans(text: string): list<dict<any>>
113 while pos < text->len()
114 var backtick = text->matchstrpos('\\*`', pos)
118 if backtick[0]->len() % 2 == 0
123 pos = backtick[2] - 1
124 var code_span = text->matchstrpos('^\(`\+\)`\@!.\{-}`\@1<!\1`\@!', pos)
128 var code_text = text->matchstrpos('^\(`\+\)\%(\zs \+\ze\|\([ \n]\=\)\zs.\{-}\S.\{-}\ze\2\)`\@1<!\1`\@!', pos)
131 start: [code_span[1], code_text[1]],
132 end: [code_text[2], code_span[2]]
139 def Unescape(text: string, block_marker: string = ""): string
140 if block_marker == '`'
141 # line breaks do not occur inside code spans
142 return text->substitute('\n', ' ', 'g')
144 # use 2 spaces instead of \ for hard line break
145 var result = text->substitute('\\\@<!\(\(\\\\\)*\)\\\n', '\1 \n', 'g')
146 # change soft line breaks
147 result = result->substitute(' \@<! \=\n', ' ', 'g')
148 # change hard line breaks
149 result = result->substitute(' \{2,}\n', '\n', 'g')
150 return result->substitute($'\\\({punctuation}\)', '\1', 'g')
153 def GetNextInlineDelimiter(text: string, start_pos: number, end_pos: number): dict<any>
155 while pos < text->len()
156 # search the first delimiter char
157 var delimiter = text->matchstrpos('\\*[_*~]', pos)
158 if delimiter[1] < 0 || delimiter[1] > end_pos
161 if delimiter[0]->len() % 2 == 0
162 # escaped delimiter char
166 pos = delimiter[2] - 1
167 var delimiter_run = text->matchstrpos(
168 $'{delimiter[0][-1]->substitute("\\([*~]\\)", "\\\\\\1", "g")}\+',
170 if delimiter_run[0][0] == '~' && delimiter_run[0]->len() > 2
171 pos = delimiter_run[2]
179 var delim_regex = delimiter_run[0]->substitute('\([*~]\)', '\\\1', 'g')
180 var is_left = text->match($'^{add_char}{delim_regex}\%(\s\|$\|{punctuation}\)\@!\|^{add_char}\%(\s\|^\|{punctuation}\)\@1<={delim_regex}{punctuation}', pos) >= 0
181 var is_right = text->match($'^{add_char}\%(\s\|^\|{punctuation}\)\@1<!{delim_regex}\|^{add_char}{punctuation}\@1<={delim_regex}\%(\s\|$\|{punctuation}\)', pos) >= 0
182 if !is_left && ! is_right
183 pos = delimiter_run[2]
186 if delimiter_run[0][0] == '_'
187 && text->match($'^\w{delimiter_run[0]}\w', pos) >= 0
188 # intraword emphasis is disallowed
189 pos = delimiter_run[2]
193 marker: delimiter_run[0],
194 start: [delimiter_run[1], delimiter_run[2]],
202 def GetNextInlineBlock(text: string, blocks: list<any>, rel_pos: number): dict<any>
207 var cur = blocks->remove(0)
208 var pos = cur.start[1]
209 while blocks->len() > 0 && cur.end[0] >= blocks[0].start[0]
210 result.text ..= Unescape(text->strpart(pos, blocks[0].start[0] - pos), cur.marker[0])
212 var part = GetNextInlineBlock(text, blocks, rel_pos + result.text->len())
213 result.text ..= part.text
214 result.props += part.props
217 result.text ..= Unescape(text->strpart(pos, cur.end[0] - pos), cur.marker[0])
218 # add props for current inline block
225 '~': 'strikethrough',
226 '~~': 'strikethrough'
228 result.props->insert(GetMarkerProp(prop_type[cur.marker],
231 result->extend({'end_pos': cur.end[1]})
235 def ParseInlines(text: string, rel_pos: number = 0): dict<any>
240 var code_spans = GetCodeSpans(text)
244 # search all emphasis
245 while pos < text->len()
246 var code_pos: list<number>
247 if code_spans->len() > 0
248 code_pos = [code_spans[0].start[0], code_spans[0].end[1]]
249 if pos >= code_pos[0]
251 seq->add(code_spans->remove(0))
255 code_pos = [text->len(), text->len()]
257 var delimiter = GetNextInlineDelimiter(text, pos, code_pos[0])
258 if delimiter->empty()
263 var idx = seq->len() - 1
265 if delimiter.marker[0] != seq[idx].marker[0]
266 || seq[idx]->has_key('end')
270 if delimiter.left || seq[idx].right
272 if (delimiter.marker->len() + seq[idx].marker->len()) % 3 == 0
273 && (delimiter.marker->len() % 3 > 0
274 || seq[idx].marker->len() % 3 > 0)
275 # not valid condition
280 var marker_len = min([delimiter.marker->len(),
281 seq[idx].marker->len(), 2])
282 if seq[idx].marker->len() > marker_len
284 marker: delimiter.marker[0]->repeat(marker_len),
285 start: [seq[idx].start[1] - marker_len, seq[idx].start[1]],
289 seq[idx].marker = seq[idx].marker[: -1 - marker_len]
290 seq[idx].start[1] -= marker_len
291 seq[idx].right = false
293 seq->insert(new_delim, idx)
296 end: [delimiter.start[0],
297 delimiter.start[0] + marker_len]})
298 # close all overlapped emphasis spans not closed
299 for i in range(seq->len() - 1, idx + 1, -1)
300 if !seq[i]->has_key('end')
304 if delimiter.marker->len() > marker_len
305 delimiter.start[0] += marker_len
307 delimiter.left = false
316 pos = delimiter.start[1]
318 while code_spans->len() > 0
319 seq->add(code_spans->remove(0))
321 # remove all not closed delimiters
322 for i in range(seq->len() - 1, 0, -1)
323 if !seq[i]->has_key('end')
331 if pos < seq[0].start[0]
332 formatted.text ..= Unescape(text->strpart(pos, seq[0].start[0] - pos))
333 pos = seq[0].start[0]
335 var inline = GetNextInlineBlock(text, seq,
336 rel_pos + formatted.text->len())
337 formatted.text ..= inline.text
338 formatted.props += inline.props
342 formatted.text ..= Unescape(text->strpart(pos))
347 # new open container block
348 def CreateContainerBlock(match: list<any>, start_lnum: number): dict<any>
349 if match[0][0] == '>'
359 marker: $' {match[0]->matchstr("\\S\\+")} ',
365 # new open leaf block
366 def CreateLeafBlock(block_type: string, line: string, ...opt: list<any>): dict<any>
367 if block_type == 'fenced_code'
368 var token = line->matchlist(code_fence)
375 elseif block_type == 'indented_code'
378 text: [line->matchstr(code_indent)]
380 elseif block_type == 'paragraph'
383 text: [line->matchstr(paragraph)]
385 elseif block_type == 'heading'
391 elseif block_type == 'table'
402 def NeedBlankLine(prev: string, cur: string): bool
403 if prev == 'hr' || cur == 'hr'
405 elseif prev == 'heading' || cur == 'heading'
407 elseif prev == 'paragraph' && cur == 'paragraph'
415 def SplitLine(line: dict<any>, indent: number = 0): list<dict<any>>
416 var lines: list<dict<any>> = []
417 var pos = line.text->match('\n')
422 var cur_line: dict<any> = {
423 text: line.text[: pos],
426 var next_line: dict<any> = {
427 text: (' '->repeat(indent) .. line.text[pos + 1 :]),
430 for prop in line.props
431 if prop.col + prop.length - 1 < pos + 1
432 cur_line.props->add(prop)
433 elseif prop.col > pos + 1
434 prop.col -= pos - indent + 1
435 next_line.props->add(prop)
437 cur_line.props->add({
440 length: pos - prop.col + 1
442 next_line.props->add({
445 length: prop.col + prop.length - pos - 2
450 return lines + SplitLine(next_line, indent)
453 var last_block: string = ''
455 def CloseBlocks(document: dict<list<any>>, blocks: list<dict<any>>, start: number = 0): void
456 if start >= blocks->len()
459 var line: dict<any> = {
463 if !document.content->empty() && NeedBlankLine(last_block, blocks[0].type)
464 document.content->add({text: '', props: []})
466 last_block = blocks[0].type
468 for i in start->range()
469 if blocks[i]->has_key('marker')
470 if blocks[i].marker =~ '\S'
471 line.props->add(GetMarkerProp('list_item',
472 line.text->len() + 1,
473 blocks[i].marker->len()))
474 line.text ..= blocks[i].marker
475 blocks[i].marker = ' '->repeat(blocks[i].marker->len())
477 line.text ..= blocks[i].marker
481 for block in blocks->remove(start, -1)
482 if block.type =~ 'quote_block\|list_item'
483 if block->has_key('marker')
484 if block.marker =~ '\S'
485 line.props->add(GetMarkerProp('list_item',
486 line.text->len() + 1,
487 block.marker->len()))
488 line.text ..= block.marker
489 block.marker = ' '->repeat(block.marker->len())
491 line.text ..= block.marker
496 if block.type =~ '_code'
497 if block.type == 'indented_code'
498 while !block.text->empty() && block.text[0] !~ '\S'
499 block.text->remove(0)
501 while !block.text->empty() && block.text[-1] !~ '\S'
502 block.text->remove(-1)
505 if !block.text->empty()
506 var indent = ' '->repeat(line.text->len())
507 var max_len = mapnew(block.text, (_, l) => l->len())->max()
508 var text = block.text->remove(0)
510 document.content->add(line)
511 var startline = document.content->len()
513 document.content->add({text: indent .. l})
515 if block->has_key('language')
516 && !globpath(&rtp, $'syntax/{block.language}.vim')->empty()
517 document.syntax->add({lang: block.language,
518 start: $'\%{startline}l\%{indent->len() + 1}c',
519 end: $'\%{document.content->len()}l$'})
521 line.props->add(GetMarkerProp('code_block',
523 document.content->len(),
524 indent->len() + max_len + 1))
527 elseif block.type == 'heading'
528 line.props->add(GetMarkerProp('heading',
529 line.text->len() + 1,
532 var format = ParseInlines(block.text, line.text->len())
533 line.text ..= format.text
534 line.props += format.props
535 document.content += SplitLine(line)
536 elseif block.type == 'table'
537 var indent = line.text
538 var head = block.header->split('\\\@1<!|')
539 var col1 = head->remove(0)
540 var format = ParseInlines(col1, line.text->len())
541 line.props->add(GetMarkerProp('table_header',
542 line.text->len() + 1,
544 line.text ..= format.text
545 line.props += format.props
547 format = ParseInlines(colx, line.text->len() + 1)
548 line.props->add(GetMarkerProp('table_sep', line.text->len() + 1, 1))
549 line.props->add(GetMarkerProp('table_header',
550 line.text->len() + 2,
552 line.text ..= $'|{format.text}'
553 line.props += format.props
555 document.content->add(line)
557 text: indent .. block.delimiter,
558 props: [GetMarkerProp('table_sep',
560 block.delimiter->len())]
562 document.content->add(data)
563 for row in block.text
568 var cell = row->split('\\\@1<!|')
569 col1 = cell->remove(0)
570 format = ParseInlines(col1, data.text->len())
571 data.text ..= format.text
572 data.props += format.props
574 format = ParseInlines(colx, data.text->len() + 1)
575 data.props->add(GetMarkerProp('table_sep',
576 data.text->len() + 1,
578 data.text ..= $'|{format.text}'
579 data.props += format.props
581 document.content->add(data)
583 elseif block.type == 'paragraph'
584 var indent = line.text->len()
585 var format = ParseInlines(block.text->join("\n")->substitute('\s\+$', '', ''), indent)
586 line.text ..= format.text
587 line.props += format.props
588 document.content += SplitLine(line, indent)
594 def ExpandTabs(line: string): string
595 var block_marker = line->matchstrpos($'^ \{{,3}}>[ \t]\+\|^[ \t]*\%({list_marker}\)\=[ \t]*')
596 if block_marker[0]->match('\t') < 0
599 var begin: string = ""
600 for char in block_marker[0]
602 begin ..= ' '->repeat(4 - (begin->strlen() % 4))
607 return begin .. line[block_marker[2] :]
610 export def ParseMarkdown(data: list<string>, width: number = 80): dict<list<any>>
611 var document: dict<list<any>> = {content: [], syntax: []}
612 var open_blocks: list<dict<any>> = []
615 var line: string = ExpandTabs(l)
618 # for each open block check if current line continue it
619 while cur < open_blocks->len()
620 if open_blocks[cur].type == 'quote_block'
621 var marker = line->matchstrpos(block_quote)
625 line = line->strpart(marker[2])
626 elseif open_blocks[cur].type == 'list_item'
627 var marker = line->matchstrpos($'^ \{{{open_blocks[cur].indent}}}')
631 line = line->strpart(marker[2])
632 elseif open_blocks[cur].type == 'fenced_code'
633 if line =~ $'^ \{{,3}}{open_blocks[cur].fence}{open_blocks[cur].fence[0]}* *$'
634 CloseBlocks(document, open_blocks, cur)
636 open_blocks[cur].text->add(line)
640 elseif open_blocks[cur].type == 'indented_code'
641 var marker = line->matchstrpos(code_indent)
643 open_blocks[cur].text->add(marker[0])
647 elseif open_blocks[cur].type == 'paragraph'
648 if line =~ setext_heading
649 var marker = line->matchstrpos(setext_heading)
650 open_blocks->add(CreateLeafBlock(
652 open_blocks->remove(cur).text->join("\n")->substitute('\s\+$', '', ''),
653 setext_heading_level[marker[0][0]]))
654 CloseBlocks(document, open_blocks, cur)
656 elseif open_blocks[cur].text->len() == 1
658 var marker = line->matchstr(table_delimiter)
660 if open_blocks[cur].text[0]->split('\\\@1<!|')->len() == marker->split('|')->len()
661 open_blocks->add(CreateLeafBlock(
663 open_blocks->remove(cur).text[0],
675 # the whole line is already consumed
679 # a thematic break close all previous blocks
680 if line =~ thematic_break
681 CloseBlocks(document, open_blocks)
682 if &g:encoding == 'utf-8'
683 document.content->add({text: "\u2500"->repeat(width)})
685 document.content->add({text: '-'->repeat(width)})
691 # check for new container blocks
693 var block = line->matchstrpos($'{block_quote}\|{list_item}')
697 # close unmatched blocks
698 CloseBlocks(document, open_blocks, cur)
700 open_blocks->add(CreateContainerBlock(block, document->len()))
701 cur = open_blocks->len()
702 line = line->strpart(block[2])
705 # check for leaf block
706 if line =~ code_fence
707 CloseBlocks(document, open_blocks, cur)
708 open_blocks->add(CreateLeafBlock('fenced_code', line))
709 elseif line =~ blank_line
710 if open_blocks->empty()
713 if open_blocks[-1].type == 'paragraph'
714 CloseBlocks(document, open_blocks, min([cur, open_blocks->len() - 1]))
715 elseif open_blocks[-1].type == 'table'
716 CloseBlocks(document, open_blocks, open_blocks->len() - 1)
717 elseif open_blocks[-1].type =~ '_code'
718 open_blocks[-1].text->add(line)
720 elseif line =~ code_indent
721 if open_blocks->empty()
722 open_blocks->add(CreateLeafBlock('indented_code', line))
723 elseif open_blocks[-1].type =~ '_code'
724 open_blocks[-1].text->add(line->matchstr(code_indent))
725 elseif open_blocks[-1].type == 'paragraph'
726 open_blocks[-1].text->add(line->matchstr(paragraph))
728 CloseBlocks(document, open_blocks, cur)
729 open_blocks->add(CreateLeafBlock('indented_code', line))
731 elseif line =~ atx_heading
732 CloseBlocks(document, open_blocks, cur)
733 var token = line->matchlist(atx_heading)
734 open_blocks->add(CreateLeafBlock('heading', token[2], token[1]->len()))
735 CloseBlocks(document, open_blocks, cur)
736 elseif !open_blocks->empty()
737 if open_blocks[-1].type == 'table'
738 open_blocks[-1].text->add(line)
739 elseif open_blocks[-1].type == 'paragraph'
740 open_blocks[-1].text->add(line->matchstr(paragraph))
742 CloseBlocks(document, open_blocks, cur)
743 open_blocks->add(CreateLeafBlock('paragraph', line))
746 open_blocks->add(CreateLeafBlock('paragraph', line))
750 CloseBlocks(document, open_blocks)
754 # vim: tabstop=8 shiftwidth=2 softtabstop=2