Merge pull request #362 from rcasta74/md_parser

author Yegappan Lakshmanan <4298407+yegappan@users.noreply.github.com>

Mon, 24 Jul 2023 14:24:48 +0000 (07:24 -0700)

committer GitHub <noreply@github.com>

Mon, 24 Jul 2023 14:24:48 +0000 (07:24 -0700)
author Yegappan Lakshmanan <4298407+yegappan@users.noreply.github.com>
Mon, 24 Jul 2023 14:24:48 +0000 (07:24 -0700)
committer GitHub <noreply@github.com>
Mon, 24 Jul 2023 14:24:48 +0000 (07:24 -0700)
diff --git a/autoload/lsp/markdown.vim b/autoload/lsp/markdown.vim

index e6aa670dd65b05531c8e8d4862f0e6cdaf434a7a..baef36378a030a4c5580662d4259e2ea15822524 100644 (file)
--- a/autoload/lsp/markdown.vim
+++ b/autoload/lsp/markdown.vim
@@ -22,7 +22,7 @@ var blank_line = '^\s*$'
  var thematic_break = '^ \{,3\}\([-_*]\)\%(\s*\1\)\{2,\}\s*$'
  var code_fence = '^ \{,3\}\(`\{3,\}\|\~\{3,\}\)\s*\(\S*\)'
  var code_indent = '^ \{4\}\zs\s*\S.*'
-var paragraph = '^\s*\zs\S.\{-}\ze\s*$'
+var paragraph = '^\s*\zs\S.\{-}\s*\ze$'
  
  var atx_heading = '^ \{,3}\zs\(#\{1,6}\) \(.\{-}\)\ze\%( #\{1,}\s*\)\=$'
  var setext_heading = '^ \{,3}\zs\%(=\{1,}\|-\{1,}\)\ze *$'
@@ -125,7 +125,7 @@ def GetCodeSpans(text: string): list<dict<any>>
      if code_span[1] < 0
        break
      endif
-    var code_text = text->matchstrpos('^\(`\+\)\%(\zs \+\ze\|\( \=\)\zs.\{-}\S.\{-}\ze\2\)`\@1<!\1`\@!', pos)
+    var code_text = text->matchstrpos('^\(`\+\)\%(\zs \+\ze\|\([ \n]\=\)\zs.\{-}\S.\{-}\ze\2\)`\@1<!\1`\@!', pos)
      code_spans->add({
         marker: '`',
         start: [code_span[1], code_text[1]],
@@ -138,9 +138,16 @@ enddef
  
  def Unescape(text: string, block_marker: string = ""): string
    if block_marker == '`'
-    return text
+    # line breaks do not occur inside code spans
+    return text->substitute('\n', ' ', 'g')
    endif
-  return text->substitute($'\\\({punctuation}\)', '\1', 'g')
+  # use 2 spaces instead of \ for hard line break
+  var result = text->substitute('\\\@<!\(\(\\\\\)*\)\\\n', '\1  \n', 'g')
+  # change soft line breaks
+  result = result->substitute(' \@<! \=\n', ' ', 'g')
+  # change hard line breaks
+  result = result->substitute(' \{2,}\n', '\n', 'g')
+  return result->substitute($'\\\({punctuation}\)', '\1', 'g')
  enddef
  
  def GetNextInlineDelimiter(text: string, start_pos: number, end_pos: number): dict<any>
@@ -405,6 +412,44 @@ def NeedBlankLine(prev: string, cur: string): bool
    return false
  enddef
  
+def SplitLine(line: dict<any>, indent: number = 0): list<dict<any>>
+  var lines: list<dict<any>> = []
+  var pos = line.text->match('\n')
+  if pos < 0
+    lines->add(line)
+    return lines
+  endif
+  var cur_line: dict<any> = {
+    text: line.text[: pos],
+    props: []
+  }
+  var next_line: dict<any> = {
+    text: (' '->repeat(indent) .. line.text[pos + 1 :]),
+    props: []
+  }
+  for prop in line.props
+    if prop.col + prop.length < pos
+      cur_line.props->add(prop)
+    elseif prop.col >= pos
+      prop.col -= pos - indent + 1
+      next_line.props->add(prop)
+    else
+      cur_line.props->add({
+        type: prop.type,
+        col: prop.col,
+        length: pos - prop.col + 1
+      })
+      next_line.props->add({
+        type: prop.type,
+        col: indent + 1,
+        length: prop.col + prop.length - pos - 2
+      })
+    endif
+  endfor
+  lines->add(cur_line)
+  return lines + SplitLine(next_line, indent)
+enddef
+
  var last_block: string = ''
  
  def CloseBlocks(document: dict<list<any>>, blocks: list<dict<any>>, start: number = 0): void
@@ -487,7 +532,7 @@ def CloseBlocks(document: dict<list<any>>, blocks: list<dict<any>>, start: numbe
         var format = ParseInlines(block.text, line.text->len())
         line.text ..= format.text
         line.props += line.props
-       document.content->add(line)
+       document.content += SplitLine(line)
        elseif block.type == 'table'
         var indent = line.text
         var head = block.header->split('\\\@1<!|')
@@ -536,21 +581,38 @@ def CloseBlocks(document: dict<list<any>>, blocks: list<dict<any>>, start: numbe
           document.content->add(data)
         endfor
        elseif block.type == 'paragraph'
-       var format = ParseInlines(block.text->join(' '), line.text->len())
+       var indent = line.text->len()
+       var format = ParseInlines(block.text->join("\n")->substitute('\s\+$', '', ''), indent)
         line.text ..= format.text
         line.props += format.props
-       document.content->add(line)
+       document.content += SplitLine(line, indent)
        endif
      endif
    endfor
  enddef
  
+def ExpandTabs(line: string): string
+  var block_marker = line->matchstrpos($'^ \{{,3}}>[ \t]\+\|^[ \t]*\%({list_marker}\)\=[ \t]*')
+  if block_marker[0]->match('\t') < 0
+    return line
+  endif
+  var begin: string = ""
+  for char in block_marker[0]
+    if char == '       '
+      begin ..= ' '->repeat(4 - (begin->strlen() % 4))
+    else
+      begin ..= char
+    endif
+  endfor
+  return begin .. line[block_marker[2] :]
+enddef
+
  export def ParseMarkdown(data: list<string>, width: number = 80): dict<list<any>>
    var document: dict<list<any>> = {content: [], syntax: []}
    var open_blocks: list<dict<any>> = []
  
    for l in data
-    var line: string = l
+    var line: string = ExpandTabs(l)
      var cur = 0
  
      # for each open block check if current line continue it
@@ -587,8 +649,8 @@ export def ParseMarkdown(data: list<string>, width: number = 80): dict<list<any>
           var marker = line->matchstrpos(setext_heading)
           open_blocks->add(CreateLeafBlock(
                                   'heading',
-                                 open_blocks->remove(cur).text->join(' '),
-                                 setext_heading_level[marker[0]]))
+                                 open_blocks->remove(cur).text->join("\n")->substitute('\s\+$', '', ''),
+                                 setext_heading_level[marker[0][0]]))
           CloseBlocks(document, open_blocks, cur)
           cur = -1
         elseif open_blocks[cur].text->len() == 1
author	Yegappan Lakshmanan <4298407+yegappan@users.noreply.github.com>
	Mon, 24 Jul 2023 14:24:48 +0000 (07:24 -0700)
committer	GitHub <noreply@github.com>
	Mon, 24 Jul 2023 14:24:48 +0000 (07:24 -0700)