| 1 | // Copyright 2020 The Go Authors. All rights reserved. |
|---|---|
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // Present2md converts legacy-syntax present files to Markdown-syntax present files. |
| 6 | // |
| 7 | // Usage: |
| 8 | // |
| 9 | // present2md [-w] [file ...] |
| 10 | // |
| 11 | // By default, present2md prints the Markdown-syntax form of each input file to standard output. |
| 12 | // If no input file is listed, standard input is used. |
| 13 | // |
| 14 | // The -w flag causes present2md to update the files in place, overwriting each with its |
| 15 | // Markdown-syntax equivalent. |
| 16 | // |
| 17 | // Examples |
| 18 | // |
| 19 | // present2md your.article |
| 20 | // present2md -w *.article |
| 21 | package main |
| 22 | |
| 23 | import ( |
| 24 | "bytes" |
| 25 | "flag" |
| 26 | "fmt" |
| 27 | "io" |
| 28 | "io/ioutil" |
| 29 | "log" |
| 30 | "net/url" |
| 31 | "os" |
| 32 | "strings" |
| 33 | "unicode" |
| 34 | "unicode/utf8" |
| 35 | |
| 36 | "golang.org/x/tools/present" |
| 37 | ) |
| 38 | |
| 39 | func usage() { |
| 40 | fmt.Fprintf(os.Stderr, "usage: present2md [-w] [file ...]\n") |
| 41 | os.Exit(2) |
| 42 | } |
| 43 | |
| 44 | var ( |
| 45 | writeBack = flag.Bool("w", false, "write conversions back to original files") |
| 46 | exitStatus = 0 |
| 47 | ) |
| 48 | |
| 49 | func main() { |
| 50 | log.SetPrefix("present2md: ") |
| 51 | log.SetFlags(0) |
| 52 | flag.Usage = usage |
| 53 | flag.Parse() |
| 54 | |
| 55 | args := flag.Args() |
| 56 | if len(args) == 0 { |
| 57 | if *writeBack { |
| 58 | log.Fatalf("cannot use -w with standard input") |
| 59 | } |
| 60 | convert(os.Stdin, "stdin", false) |
| 61 | return |
| 62 | } |
| 63 | |
| 64 | for _, arg := range args { |
| 65 | f, err := os.Open(arg) |
| 66 | if err != nil { |
| 67 | log.Print(err) |
| 68 | exitStatus = 1 |
| 69 | continue |
| 70 | } |
| 71 | err = convert(f, arg, *writeBack) |
| 72 | f.Close() |
| 73 | if err != nil { |
| 74 | log.Print(err) |
| 75 | exitStatus = 1 |
| 76 | } |
| 77 | } |
| 78 | os.Exit(exitStatus) |
| 79 | } |
| 80 | |
| 81 | // convert reads the data from r, parses it as legacy present, |
| 82 | // and converts it to Markdown-enabled present. |
| 83 | // If any errors occur, the data is reported as coming from file. |
| 84 | // If writeBack is true, the converted version is written back to file. |
| 85 | // If writeBack is false, the converted version is printed to standard output. |
| 86 | func convert(r io.Reader, file string, writeBack bool) error { |
| 87 | data, err := ioutil.ReadAll(r) |
| 88 | if err != nil { |
| 89 | return err |
| 90 | } |
| 91 | if bytes.HasPrefix(data, []byte("# ")) { |
| 92 | return fmt.Errorf("%v: already markdown", file) |
| 93 | } |
| 94 | |
| 95 | // Convert all comments before parsing the document. |
| 96 | // The '//' comment is treated as normal text and so |
| 97 | // is passed through the translation unaltered. |
| 98 | data = bytes.Replace(data, []byte("\n#"), []byte("\n//"), -1) |
| 99 | |
| 100 | doc, err := present.Parse(bytes.NewReader(data), file, 0) |
| 101 | if err != nil { |
| 102 | return err |
| 103 | } |
| 104 | |
| 105 | // Title and Subtitle, Time, Tags. |
| 106 | var md bytes.Buffer |
| 107 | fmt.Fprintf(&md, "# %s\n", doc.Title) |
| 108 | if doc.Subtitle != "" { |
| 109 | fmt.Fprintf(&md, "%s\n", doc.Subtitle) |
| 110 | } |
| 111 | if !doc.Time.IsZero() { |
| 112 | fmt.Fprintf(&md, "%s\n", doc.Time.Format("2 Jan 2006")) |
| 113 | } |
| 114 | if len(doc.Tags) > 0 { |
| 115 | fmt.Fprintf(&md, "Tags: %s\n", strings.Join(doc.Tags, ", ")) |
| 116 | } |
| 117 | |
| 118 | // Summary, defaulting to first paragraph of section. |
| 119 | // (Summaries must be explicit for Markdown-enabled present, |
| 120 | // and the expectation is that they will be shorter than the |
| 121 | // whole first paragraph. But this is what the blog does today.) |
| 122 | if strings.HasSuffix(file, ".article") && len(doc.Sections) > 0 { |
| 123 | for _, elem := range doc.Sections[0].Elem { |
| 124 | text, ok := elem.(present.Text) |
| 125 | if !ok || text.Pre { |
| 126 | // skip everything but non-text elements |
| 127 | continue |
| 128 | } |
| 129 | fmt.Fprintf(&md, "Summary:") |
| 130 | for i, line := range text.Lines { |
| 131 | fmt.Fprintf(&md, " ") |
| 132 | printStyled(&md, line, i == 0) |
| 133 | } |
| 134 | fmt.Fprintf(&md, "\n") |
| 135 | break |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | // Authors |
| 140 | for _, a := range doc.Authors { |
| 141 | fmt.Fprintf(&md, "\n") |
| 142 | for _, elem := range a.Elem { |
| 143 | switch elem := elem.(type) { |
| 144 | default: |
| 145 | // Can only happen if this type switch is incomplete, which is a bug. |
| 146 | log.Fatalf("%s: unexpected author type %T", file, elem) |
| 147 | case present.Text: |
| 148 | for _, line := range elem.Lines { |
| 149 | fmt.Fprintf(&md, "%s\n", markdownEscape(line, true)) |
| 150 | } |
| 151 | case present.Link: |
| 152 | fmt.Fprintf(&md, "%s\n", markdownEscape(elem.Label, true)) |
| 153 | } |
| 154 | } |
| 155 | } |
| 156 | |
| 157 | // Invariant: the output ends in non-blank line now, |
| 158 | // and after printing any piece of the file below, |
| 159 | // the output should still end in a non-blank line. |
| 160 | // If a blank line separator is needed, it should be printed |
| 161 | // before the block that needs separating, not after. |
| 162 | |
| 163 | if len(doc.TitleNotes) > 0 { |
| 164 | fmt.Fprintf(&md, "\n") |
| 165 | for _, line := range doc.TitleNotes { |
| 166 | fmt.Fprintf(&md, ": %s\n", line) |
| 167 | } |
| 168 | } |
| 169 | |
| 170 | if len(doc.Sections) == 1 && strings.HasSuffix(file, ".article") { |
| 171 | // Blog drops section headers when there is only one section. |
| 172 | // Don't print a title in this case, to make clear that it's being dropped. |
| 173 | fmt.Fprintf(&md, "\n##\n") |
| 174 | printSectionBody(file, 1, &md, doc.Sections[0].Elem) |
| 175 | } else { |
| 176 | for _, s := range doc.Sections { |
| 177 | fmt.Fprintf(&md, "\n") |
| 178 | fmt.Fprintf(&md, "## %s\n", markdownEscape(s.Title, false)) |
| 179 | printSectionBody(file, 1, &md, s.Elem) |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | if !writeBack { |
| 184 | os.Stdout.Write(md.Bytes()) |
| 185 | return nil |
| 186 | } |
| 187 | return ioutil.WriteFile(file, md.Bytes(), 0666) |
| 188 | } |
| 189 | |
| 190 | func printSectionBody(file string, depth int, w *bytes.Buffer, elems []present.Elem) { |
| 191 | for _, elem := range elems { |
| 192 | switch elem := elem.(type) { |
| 193 | default: |
| 194 | // Can only happen if this type switch is incomplete, which is a bug. |
| 195 | log.Fatalf("%s: unexpected present element type %T", file, elem) |
| 196 | |
| 197 | case present.Text: |
| 198 | fmt.Fprintf(w, "\n") |
| 199 | lines := elem.Lines |
| 200 | for len(lines) > 0 && lines[0] == "" { |
| 201 | lines = lines[1:] |
| 202 | } |
| 203 | if elem.Pre { |
| 204 | for _, line := range strings.Split(strings.TrimRight(elem.Raw, "\n"), "\n") { |
| 205 | if line == "" { |
| 206 | fmt.Fprintf(w, "\n") |
| 207 | } else { |
| 208 | fmt.Fprintf(w, "\t%s\n", line) |
| 209 | } |
| 210 | } |
| 211 | } else { |
| 212 | for _, line := range elem.Lines { |
| 213 | printStyled(w, line, true) |
| 214 | fmt.Fprintf(w, "\n") |
| 215 | } |
| 216 | } |
| 217 | |
| 218 | case present.List: |
| 219 | fmt.Fprintf(w, "\n") |
| 220 | for _, item := range elem.Bullet { |
| 221 | fmt.Fprintf(w, " - ") |
| 222 | for i, line := range strings.Split(item, "\n") { |
| 223 | if i > 0 { |
| 224 | fmt.Fprintf(w, " ") |
| 225 | } |
| 226 | printStyled(w, line, false) |
| 227 | fmt.Fprintf(w, "\n") |
| 228 | } |
| 229 | } |
| 230 | |
| 231 | case present.Section: |
| 232 | fmt.Fprintf(w, "\n") |
| 233 | sep := " " |
| 234 | if elem.Title == "" { |
| 235 | sep = "" |
| 236 | } |
| 237 | fmt.Fprintf(w, "%s%s%s\n", strings.Repeat("#", depth+2), sep, markdownEscape(elem.Title, false)) |
| 238 | printSectionBody(file, depth+1, w, elem.Elem) |
| 239 | |
| 240 | case interface{ PresentCmd() string }: |
| 241 | // If there are multiple present commands in a row, don't print a blank line before the second etc. |
| 242 | b := w.Bytes() |
| 243 | sep := "\n" |
| 244 | if len(b) > 0 { |
| 245 | i := bytes.LastIndexByte(b[:len(b)-1], '\n') |
| 246 | if b[i+1] == '.' { |
| 247 | sep = "" |
| 248 | } |
| 249 | } |
| 250 | fmt.Fprintf(w, "%s%s\n", sep, elem.PresentCmd()) |
| 251 | } |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | func markdownEscape(s string, startLine bool) string { |
| 256 | var b strings.Builder |
| 257 | for i, r := range s { |
| 258 | switch { |
| 259 | case r == '#' && i == 0, |
| 260 | r == '*', |
| 261 | r == '_', |
| 262 | r == '<' && (i == 0 || s[i-1] != ' ') && i+1 < len(s) && s[i+1] != ' ', |
| 263 | r == '[' && strings.Contains(s[i:], "]("): |
| 264 | b.WriteRune('\\') |
| 265 | } |
| 266 | b.WriteRune(r) |
| 267 | } |
| 268 | return b.String() |
| 269 | } |
| 270 | |
| 271 | // Copy of ../../present/style.go adjusted to produce Markdown instead of HTML. |
| 272 | |
| 273 | /* |
| 274 | Fonts are demarcated by an initial and final char bracketing a |
| 275 | space-delimited word, plus possibly some terminal punctuation. |
| 276 | The chars are |
| 277 | _ for italic |
| 278 | * for bold |
| 279 | ` (back quote) for fixed width. |
| 280 | Inner appearances of the char become spaces. For instance, |
| 281 | _this_is_italic_! |
| 282 | becomes |
| 283 | <i>this is italic</i>! |
| 284 | */ |
| 285 | |
| 286 | func printStyled(w *bytes.Buffer, text string, startLine bool) { |
| 287 | w.WriteString(font(text, startLine)) |
| 288 | } |
| 289 | |
| 290 | // font returns s with font indicators turned into HTML font tags. |
| 291 | func font(s string, startLine bool) string { |
| 292 | if !strings.ContainsAny(s, "[`_*") { |
| 293 | return markdownEscape(s, startLine) |
| 294 | } |
| 295 | words := split(s) |
| 296 | var b bytes.Buffer |
| 297 | Word: |
| 298 | for w, word := range words { |
| 299 | words[w] = markdownEscape(word, startLine && w == 0) // for all the continue Word |
| 300 | if len(word) < 2 { |
| 301 | continue Word |
| 302 | } |
| 303 | if link, _ := parseInlineLink(word); link != "" { |
| 304 | words[w] = link |
| 305 | continue Word |
| 306 | } |
| 307 | const marker = "_*`" |
| 308 | // Initial punctuation is OK but must be peeled off. |
| 309 | first := strings.IndexAny(word, marker) |
| 310 | if first == -1 { |
| 311 | continue Word |
| 312 | } |
| 313 | // Opening marker must be at the beginning of the token or else preceded by punctuation. |
| 314 | if first != 0 { |
| 315 | r, _ := utf8.DecodeLastRuneInString(word[:first]) |
| 316 | if !unicode.IsPunct(r) { |
| 317 | continue Word |
| 318 | } |
| 319 | } |
| 320 | open, word := markdownEscape(word[:first], startLine && w == 0), word[first:] |
| 321 | char := word[0] // ASCII is OK. |
| 322 | close := "" |
| 323 | switch char { |
| 324 | default: |
| 325 | continue Word |
| 326 | case '_': |
| 327 | open += "_" |
| 328 | close = "_" |
| 329 | case '*': |
| 330 | open += "**" |
| 331 | close = "**" |
| 332 | case '`': |
| 333 | open += "`" |
| 334 | close = "`" |
| 335 | } |
| 336 | // Closing marker must be at the end of the token or else followed by punctuation. |
| 337 | last := strings.LastIndex(word, word[:1]) |
| 338 | if last == 0 { |
| 339 | continue Word |
| 340 | } |
| 341 | if last+1 != len(word) { |
| 342 | r, _ := utf8.DecodeRuneInString(word[last+1:]) |
| 343 | if !unicode.IsPunct(r) { |
| 344 | continue Word |
| 345 | } |
| 346 | } |
| 347 | head, tail := word[:last+1], word[last+1:] |
| 348 | b.Reset() |
| 349 | var wid int |
| 350 | for i := 1; i < len(head)-1; i += wid { |
| 351 | var r rune |
| 352 | r, wid = utf8.DecodeRuneInString(head[i:]) |
| 353 | if r != rune(char) { |
| 354 | // Ordinary character. |
| 355 | b.WriteRune(r) |
| 356 | continue |
| 357 | } |
| 358 | if head[i+1] != char { |
| 359 | // Inner char becomes space. |
| 360 | b.WriteRune(' ') |
| 361 | continue |
| 362 | } |
| 363 | // Doubled char becomes real char. |
| 364 | // Not worth worrying about "_x__". |
| 365 | b.WriteByte(char) |
| 366 | wid++ // Consumed two chars, both ASCII. |
| 367 | } |
| 368 | text := b.String() |
| 369 | if close == "`" { |
| 370 | for strings.Contains(text, close) { |
| 371 | open += "`" |
| 372 | close += "`" |
| 373 | } |
| 374 | } else { |
| 375 | text = markdownEscape(text, false) |
| 376 | } |
| 377 | words[w] = open + text + close + tail |
| 378 | } |
| 379 | return strings.Join(words, "") |
| 380 | } |
| 381 | |
| 382 | // split is like strings.Fields but also returns the runs of spaces |
| 383 | // and treats inline links as distinct words. |
| 384 | func split(s string) []string { |
| 385 | var ( |
| 386 | words = make([]string, 0, 10) |
| 387 | start = 0 |
| 388 | ) |
| 389 | |
| 390 | // appendWord appends the string s[start:end] to the words slice. |
| 391 | // If the word contains the beginning of a link, the non-link portion |
| 392 | // of the word and the entire link are appended as separate words, |
| 393 | // and the start index is advanced to the end of the link. |
| 394 | appendWord := func(end int) { |
| 395 | if j := strings.Index(s[start:end], "[["); j > -1 { |
| 396 | if _, l := parseInlineLink(s[start+j:]); l > 0 { |
| 397 | // Append portion before link, if any. |
| 398 | if j > 0 { |
| 399 | words = append(words, s[start:start+j]) |
| 400 | } |
| 401 | // Append link itself. |
| 402 | words = append(words, s[start+j:start+j+l]) |
| 403 | // Advance start index to end of link. |
| 404 | start = start + j + l |
| 405 | return |
| 406 | } |
| 407 | } |
| 408 | // No link; just add the word. |
| 409 | words = append(words, s[start:end]) |
| 410 | start = end |
| 411 | } |
| 412 | |
| 413 | wasSpace := false |
| 414 | for i, r := range s { |
| 415 | isSpace := unicode.IsSpace(r) |
| 416 | if i > start && isSpace != wasSpace { |
| 417 | appendWord(i) |
| 418 | } |
| 419 | wasSpace = isSpace |
| 420 | } |
| 421 | for start < len(s) { |
| 422 | appendWord(len(s)) |
| 423 | } |
| 424 | return words |
| 425 | } |
| 426 | |
| 427 | // parseInlineLink parses an inline link at the start of s, and returns |
| 428 | // a rendered Markdown link and the total length of the raw inline link. |
| 429 | // If no inline link is present, it returns all zeroes. |
| 430 | func parseInlineLink(s string) (link string, length int) { |
| 431 | if !strings.HasPrefix(s, "[[") { |
| 432 | return |
| 433 | } |
| 434 | end := strings.Index(s, "]]") |
| 435 | if end == -1 { |
| 436 | return |
| 437 | } |
| 438 | urlEnd := strings.Index(s, "]") |
| 439 | rawURL := s[2:urlEnd] |
| 440 | const badURLChars = `<>"{}|\^[] ` + "`" // per RFC2396 section 2.4.3 |
| 441 | if strings.ContainsAny(rawURL, badURLChars) { |
| 442 | return |
| 443 | } |
| 444 | if urlEnd == end { |
| 445 | simpleURL := "" |
| 446 | url, err := url.Parse(rawURL) |
| 447 | if err == nil { |
| 448 | // If the URL is http://foo.com, drop the http:// |
| 449 | // In other words, render [[http://golang.org]] as: |
| 450 | // <a href="http://golang.org">golang.org</a> |
| 451 | if strings.HasPrefix(rawURL, url.Scheme+"://") { |
| 452 | simpleURL = strings.TrimPrefix(rawURL, url.Scheme+"://") |
| 453 | } else if strings.HasPrefix(rawURL, url.Scheme+":") { |
| 454 | simpleURL = strings.TrimPrefix(rawURL, url.Scheme+":") |
| 455 | } |
| 456 | } |
| 457 | return renderLink(rawURL, simpleURL), end + 2 |
| 458 | } |
| 459 | if s[urlEnd:urlEnd+2] != "][" { |
| 460 | return |
| 461 | } |
| 462 | text := s[urlEnd+2 : end] |
| 463 | return renderLink(rawURL, text), end + 2 |
| 464 | } |
| 465 | |
| 466 | func renderLink(href, text string) string { |
| 467 | text = font(text, false) |
| 468 | if text == "" { |
| 469 | text = markdownEscape(href, false) |
| 470 | } |
| 471 | return "[" + text + "](" + href + ")" |
| 472 | } |
| 473 |
Members