1 | // Copyright 2020 The Go Authors. All rights reserved. |
---|---|
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | // Present2md converts legacy-syntax present files to Markdown-syntax present files. |
6 | // |
7 | // Usage: |
8 | // |
9 | // present2md [-w] [file ...] |
10 | // |
11 | // By default, present2md prints the Markdown-syntax form of each input file to standard output. |
12 | // If no input file is listed, standard input is used. |
13 | // |
14 | // The -w flag causes present2md to update the files in place, overwriting each with its |
15 | // Markdown-syntax equivalent. |
16 | // |
17 | // Examples |
18 | // |
19 | // present2md your.article |
20 | // present2md -w *.article |
21 | package main |
22 | |
23 | import ( |
24 | "bytes" |
25 | "flag" |
26 | "fmt" |
27 | "io" |
28 | "io/ioutil" |
29 | "log" |
30 | "net/url" |
31 | "os" |
32 | "strings" |
33 | "unicode" |
34 | "unicode/utf8" |
35 | |
36 | "golang.org/x/tools/present" |
37 | ) |
38 | |
39 | func usage() { |
40 | fmt.Fprintf(os.Stderr, "usage: present2md [-w] [file ...]\n") |
41 | os.Exit(2) |
42 | } |
43 | |
44 | var ( |
45 | writeBack = flag.Bool("w", false, "write conversions back to original files") |
46 | exitStatus = 0 |
47 | ) |
48 | |
49 | func main() { |
50 | log.SetPrefix("present2md: ") |
51 | log.SetFlags(0) |
52 | flag.Usage = usage |
53 | flag.Parse() |
54 | |
55 | args := flag.Args() |
56 | if len(args) == 0 { |
57 | if *writeBack { |
58 | log.Fatalf("cannot use -w with standard input") |
59 | } |
60 | convert(os.Stdin, "stdin", false) |
61 | return |
62 | } |
63 | |
64 | for _, arg := range args { |
65 | f, err := os.Open(arg) |
66 | if err != nil { |
67 | log.Print(err) |
68 | exitStatus = 1 |
69 | continue |
70 | } |
71 | err = convert(f, arg, *writeBack) |
72 | f.Close() |
73 | if err != nil { |
74 | log.Print(err) |
75 | exitStatus = 1 |
76 | } |
77 | } |
78 | os.Exit(exitStatus) |
79 | } |
80 | |
81 | // convert reads the data from r, parses it as legacy present, |
82 | // and converts it to Markdown-enabled present. |
83 | // If any errors occur, the data is reported as coming from file. |
84 | // If writeBack is true, the converted version is written back to file. |
85 | // If writeBack is false, the converted version is printed to standard output. |
86 | func convert(r io.Reader, file string, writeBack bool) error { |
87 | data, err := ioutil.ReadAll(r) |
88 | if err != nil { |
89 | return err |
90 | } |
91 | if bytes.HasPrefix(data, []byte("# ")) { |
92 | return fmt.Errorf("%v: already markdown", file) |
93 | } |
94 | |
95 | // Convert all comments before parsing the document. |
96 | // The '//' comment is treated as normal text and so |
97 | // is passed through the translation unaltered. |
98 | data = bytes.Replace(data, []byte("\n#"), []byte("\n//"), -1) |
99 | |
100 | doc, err := present.Parse(bytes.NewReader(data), file, 0) |
101 | if err != nil { |
102 | return err |
103 | } |
104 | |
105 | // Title and Subtitle, Time, Tags. |
106 | var md bytes.Buffer |
107 | fmt.Fprintf(&md, "# %s\n", doc.Title) |
108 | if doc.Subtitle != "" { |
109 | fmt.Fprintf(&md, "%s\n", doc.Subtitle) |
110 | } |
111 | if !doc.Time.IsZero() { |
112 | fmt.Fprintf(&md, "%s\n", doc.Time.Format("2 Jan 2006")) |
113 | } |
114 | if len(doc.Tags) > 0 { |
115 | fmt.Fprintf(&md, "Tags: %s\n", strings.Join(doc.Tags, ", ")) |
116 | } |
117 | |
118 | // Summary, defaulting to first paragraph of section. |
119 | // (Summaries must be explicit for Markdown-enabled present, |
120 | // and the expectation is that they will be shorter than the |
121 | // whole first paragraph. But this is what the blog does today.) |
122 | if strings.HasSuffix(file, ".article") && len(doc.Sections) > 0 { |
123 | for _, elem := range doc.Sections[0].Elem { |
124 | text, ok := elem.(present.Text) |
125 | if !ok || text.Pre { |
126 | // skip everything but non-text elements |
127 | continue |
128 | } |
129 | fmt.Fprintf(&md, "Summary:") |
130 | for i, line := range text.Lines { |
131 | fmt.Fprintf(&md, " ") |
132 | printStyled(&md, line, i == 0) |
133 | } |
134 | fmt.Fprintf(&md, "\n") |
135 | break |
136 | } |
137 | } |
138 | |
139 | // Authors |
140 | for _, a := range doc.Authors { |
141 | fmt.Fprintf(&md, "\n") |
142 | for _, elem := range a.Elem { |
143 | switch elem := elem.(type) { |
144 | default: |
145 | // Can only happen if this type switch is incomplete, which is a bug. |
146 | log.Fatalf("%s: unexpected author type %T", file, elem) |
147 | case present.Text: |
148 | for _, line := range elem.Lines { |
149 | fmt.Fprintf(&md, "%s\n", markdownEscape(line, true)) |
150 | } |
151 | case present.Link: |
152 | fmt.Fprintf(&md, "%s\n", markdownEscape(elem.Label, true)) |
153 | } |
154 | } |
155 | } |
156 | |
157 | // Invariant: the output ends in non-blank line now, |
158 | // and after printing any piece of the file below, |
159 | // the output should still end in a non-blank line. |
160 | // If a blank line separator is needed, it should be printed |
161 | // before the block that needs separating, not after. |
162 | |
163 | if len(doc.TitleNotes) > 0 { |
164 | fmt.Fprintf(&md, "\n") |
165 | for _, line := range doc.TitleNotes { |
166 | fmt.Fprintf(&md, ": %s\n", line) |
167 | } |
168 | } |
169 | |
170 | if len(doc.Sections) == 1 && strings.HasSuffix(file, ".article") { |
171 | // Blog drops section headers when there is only one section. |
172 | // Don't print a title in this case, to make clear that it's being dropped. |
173 | fmt.Fprintf(&md, "\n##\n") |
174 | printSectionBody(file, 1, &md, doc.Sections[0].Elem) |
175 | } else { |
176 | for _, s := range doc.Sections { |
177 | fmt.Fprintf(&md, "\n") |
178 | fmt.Fprintf(&md, "## %s\n", markdownEscape(s.Title, false)) |
179 | printSectionBody(file, 1, &md, s.Elem) |
180 | } |
181 | } |
182 | |
183 | if !writeBack { |
184 | os.Stdout.Write(md.Bytes()) |
185 | return nil |
186 | } |
187 | return ioutil.WriteFile(file, md.Bytes(), 0666) |
188 | } |
189 | |
190 | func printSectionBody(file string, depth int, w *bytes.Buffer, elems []present.Elem) { |
191 | for _, elem := range elems { |
192 | switch elem := elem.(type) { |
193 | default: |
194 | // Can only happen if this type switch is incomplete, which is a bug. |
195 | log.Fatalf("%s: unexpected present element type %T", file, elem) |
196 | |
197 | case present.Text: |
198 | fmt.Fprintf(w, "\n") |
199 | lines := elem.Lines |
200 | for len(lines) > 0 && lines[0] == "" { |
201 | lines = lines[1:] |
202 | } |
203 | if elem.Pre { |
204 | for _, line := range strings.Split(strings.TrimRight(elem.Raw, "\n"), "\n") { |
205 | if line == "" { |
206 | fmt.Fprintf(w, "\n") |
207 | } else { |
208 | fmt.Fprintf(w, "\t%s\n", line) |
209 | } |
210 | } |
211 | } else { |
212 | for _, line := range elem.Lines { |
213 | printStyled(w, line, true) |
214 | fmt.Fprintf(w, "\n") |
215 | } |
216 | } |
217 | |
218 | case present.List: |
219 | fmt.Fprintf(w, "\n") |
220 | for _, item := range elem.Bullet { |
221 | fmt.Fprintf(w, " - ") |
222 | for i, line := range strings.Split(item, "\n") { |
223 | if i > 0 { |
224 | fmt.Fprintf(w, " ") |
225 | } |
226 | printStyled(w, line, false) |
227 | fmt.Fprintf(w, "\n") |
228 | } |
229 | } |
230 | |
231 | case present.Section: |
232 | fmt.Fprintf(w, "\n") |
233 | sep := " " |
234 | if elem.Title == "" { |
235 | sep = "" |
236 | } |
237 | fmt.Fprintf(w, "%s%s%s\n", strings.Repeat("#", depth+2), sep, markdownEscape(elem.Title, false)) |
238 | printSectionBody(file, depth+1, w, elem.Elem) |
239 | |
240 | case interface{ PresentCmd() string }: |
241 | // If there are multiple present commands in a row, don't print a blank line before the second etc. |
242 | b := w.Bytes() |
243 | sep := "\n" |
244 | if len(b) > 0 { |
245 | i := bytes.LastIndexByte(b[:len(b)-1], '\n') |
246 | if b[i+1] == '.' { |
247 | sep = "" |
248 | } |
249 | } |
250 | fmt.Fprintf(w, "%s%s\n", sep, elem.PresentCmd()) |
251 | } |
252 | } |
253 | } |
254 | |
255 | func markdownEscape(s string, startLine bool) string { |
256 | var b strings.Builder |
257 | for i, r := range s { |
258 | switch { |
259 | case r == '#' && i == 0, |
260 | r == '*', |
261 | r == '_', |
262 | r == '<' && (i == 0 || s[i-1] != ' ') && i+1 < len(s) && s[i+1] != ' ', |
263 | r == '[' && strings.Contains(s[i:], "]("): |
264 | b.WriteRune('\\') |
265 | } |
266 | b.WriteRune(r) |
267 | } |
268 | return b.String() |
269 | } |
270 | |
271 | // Copy of ../../present/style.go adjusted to produce Markdown instead of HTML. |
272 | |
273 | /* |
274 | Fonts are demarcated by an initial and final char bracketing a |
275 | space-delimited word, plus possibly some terminal punctuation. |
276 | The chars are |
277 | _ for italic |
278 | * for bold |
279 | ` (back quote) for fixed width. |
280 | Inner appearances of the char become spaces. For instance, |
281 | _this_is_italic_! |
282 | becomes |
283 | <i>this is italic</i>! |
284 | */ |
285 | |
286 | func printStyled(w *bytes.Buffer, text string, startLine bool) { |
287 | w.WriteString(font(text, startLine)) |
288 | } |
289 | |
290 | // font returns s with font indicators turned into HTML font tags. |
291 | func font(s string, startLine bool) string { |
292 | if !strings.ContainsAny(s, "[`_*") { |
293 | return markdownEscape(s, startLine) |
294 | } |
295 | words := split(s) |
296 | var b bytes.Buffer |
297 | Word: |
298 | for w, word := range words { |
299 | words[w] = markdownEscape(word, startLine && w == 0) // for all the continue Word |
300 | if len(word) < 2 { |
301 | continue Word |
302 | } |
303 | if link, _ := parseInlineLink(word); link != "" { |
304 | words[w] = link |
305 | continue Word |
306 | } |
307 | const marker = "_*`" |
308 | // Initial punctuation is OK but must be peeled off. |
309 | first := strings.IndexAny(word, marker) |
310 | if first == -1 { |
311 | continue Word |
312 | } |
313 | // Opening marker must be at the beginning of the token or else preceded by punctuation. |
314 | if first != 0 { |
315 | r, _ := utf8.DecodeLastRuneInString(word[:first]) |
316 | if !unicode.IsPunct(r) { |
317 | continue Word |
318 | } |
319 | } |
320 | open, word := markdownEscape(word[:first], startLine && w == 0), word[first:] |
321 | char := word[0] // ASCII is OK. |
322 | close := "" |
323 | switch char { |
324 | default: |
325 | continue Word |
326 | case '_': |
327 | open += "_" |
328 | close = "_" |
329 | case '*': |
330 | open += "**" |
331 | close = "**" |
332 | case '`': |
333 | open += "`" |
334 | close = "`" |
335 | } |
336 | // Closing marker must be at the end of the token or else followed by punctuation. |
337 | last := strings.LastIndex(word, word[:1]) |
338 | if last == 0 { |
339 | continue Word |
340 | } |
341 | if last+1 != len(word) { |
342 | r, _ := utf8.DecodeRuneInString(word[last+1:]) |
343 | if !unicode.IsPunct(r) { |
344 | continue Word |
345 | } |
346 | } |
347 | head, tail := word[:last+1], word[last+1:] |
348 | b.Reset() |
349 | var wid int |
350 | for i := 1; i < len(head)-1; i += wid { |
351 | var r rune |
352 | r, wid = utf8.DecodeRuneInString(head[i:]) |
353 | if r != rune(char) { |
354 | // Ordinary character. |
355 | b.WriteRune(r) |
356 | continue |
357 | } |
358 | if head[i+1] != char { |
359 | // Inner char becomes space. |
360 | b.WriteRune(' ') |
361 | continue |
362 | } |
363 | // Doubled char becomes real char. |
364 | // Not worth worrying about "_x__". |
365 | b.WriteByte(char) |
366 | wid++ // Consumed two chars, both ASCII. |
367 | } |
368 | text := b.String() |
369 | if close == "`" { |
370 | for strings.Contains(text, close) { |
371 | open += "`" |
372 | close += "`" |
373 | } |
374 | } else { |
375 | text = markdownEscape(text, false) |
376 | } |
377 | words[w] = open + text + close + tail |
378 | } |
379 | return strings.Join(words, "") |
380 | } |
381 | |
382 | // split is like strings.Fields but also returns the runs of spaces |
383 | // and treats inline links as distinct words. |
384 | func split(s string) []string { |
385 | var ( |
386 | words = make([]string, 0, 10) |
387 | start = 0 |
388 | ) |
389 | |
390 | // appendWord appends the string s[start:end] to the words slice. |
391 | // If the word contains the beginning of a link, the non-link portion |
392 | // of the word and the entire link are appended as separate words, |
393 | // and the start index is advanced to the end of the link. |
394 | appendWord := func(end int) { |
395 | if j := strings.Index(s[start:end], "[["); j > -1 { |
396 | if _, l := parseInlineLink(s[start+j:]); l > 0 { |
397 | // Append portion before link, if any. |
398 | if j > 0 { |
399 | words = append(words, s[start:start+j]) |
400 | } |
401 | // Append link itself. |
402 | words = append(words, s[start+j:start+j+l]) |
403 | // Advance start index to end of link. |
404 | start = start + j + l |
405 | return |
406 | } |
407 | } |
408 | // No link; just add the word. |
409 | words = append(words, s[start:end]) |
410 | start = end |
411 | } |
412 | |
413 | wasSpace := false |
414 | for i, r := range s { |
415 | isSpace := unicode.IsSpace(r) |
416 | if i > start && isSpace != wasSpace { |
417 | appendWord(i) |
418 | } |
419 | wasSpace = isSpace |
420 | } |
421 | for start < len(s) { |
422 | appendWord(len(s)) |
423 | } |
424 | return words |
425 | } |
426 | |
427 | // parseInlineLink parses an inline link at the start of s, and returns |
428 | // a rendered Markdown link and the total length of the raw inline link. |
429 | // If no inline link is present, it returns all zeroes. |
430 | func parseInlineLink(s string) (link string, length int) { |
431 | if !strings.HasPrefix(s, "[[") { |
432 | return |
433 | } |
434 | end := strings.Index(s, "]]") |
435 | if end == -1 { |
436 | return |
437 | } |
438 | urlEnd := strings.Index(s, "]") |
439 | rawURL := s[2:urlEnd] |
440 | const badURLChars = `<>"{}|\^[] ` + "`" // per RFC2396 section 2.4.3 |
441 | if strings.ContainsAny(rawURL, badURLChars) { |
442 | return |
443 | } |
444 | if urlEnd == end { |
445 | simpleURL := "" |
446 | url, err := url.Parse(rawURL) |
447 | if err == nil { |
448 | // If the URL is http://foo.com, drop the http:// |
449 | // In other words, render [[http://golang.org]] as: |
450 | // <a href="http://golang.org">golang.org</a> |
451 | if strings.HasPrefix(rawURL, url.Scheme+"://") { |
452 | simpleURL = strings.TrimPrefix(rawURL, url.Scheme+"://") |
453 | } else if strings.HasPrefix(rawURL, url.Scheme+":") { |
454 | simpleURL = strings.TrimPrefix(rawURL, url.Scheme+":") |
455 | } |
456 | } |
457 | return renderLink(rawURL, simpleURL), end + 2 |
458 | } |
459 | if s[urlEnd:urlEnd+2] != "][" { |
460 | return |
461 | } |
462 | text := s[urlEnd+2 : end] |
463 | return renderLink(rawURL, text), end + 2 |
464 | } |
465 | |
466 | func renderLink(href, text string) string { |
467 | text = font(text, false) |
468 | if text == "" { |
469 | text = markdownEscape(href, false) |
470 | } |
471 | return "[" + text + "](" + href + ")" |
472 | } |
473 |
Members