gopls/cmd/present2md/main.go

1	// Copyright 2020 The Go Authors. All rights reserved.
2	// Use of this source code is governed by a BSD-style
3	// license that can be found in the LICENSE file.
4
5	// Present2md converts legacy-syntax present files to Markdown-syntax present files.
6	//
7	// Usage:
8	//
9	// present2md [-w] [file ...]
10	//
11	// By default, present2md prints the Markdown-syntax form of each input file to standard output.
12	// If no input file is listed, standard input is used.
13	//
14	// The -w flag causes present2md to update the files in place, overwriting each with its
15	// Markdown-syntax equivalent.
16	//
17	// Examples
18	//
19	// present2md your.article
20	// present2md -w *.article
21	package main
22
23	import (
24	"bytes"
25	"flag"
26	"fmt"
27	"io"
28	"io/ioutil"
29	"log"
30	"net/url"
31	"os"
32	"strings"
33	"unicode"
34	"unicode/utf8"
35
36	"golang.org/x/tools/present"
37	)
38
39	func usage() {
40	fmt.Fprintf(os.Stderr, "usage: present2md [-w] [file ...]\n")
41	os.Exit(2)
42	}
43
44	var (
45	writeBack = flag.Bool("w", false, "write conversions back to original files")
46	exitStatus = 0
47	)
48
49	func main() {
50	log.SetPrefix("present2md: ")
51	log.SetFlags(0)
52	flag.Usage = usage
53	flag.Parse()
54
55	args := flag.Args()
56	if len(args) == 0 {
57	if *writeBack {
58	log.Fatalf("cannot use -w with standard input")
59	}
60	convert(os.Stdin, "stdin", false)
61	return
62	}
63
64	for _, arg := range args {
65	f, err := os.Open(arg)
66	if err != nil {
67	log.Print(err)
68	exitStatus = 1
69	continue
70	}
71	err = convert(f, arg, *writeBack)
72	f.Close()
73	if err != nil {
74	log.Print(err)
75	exitStatus = 1
76	}
77	}
78	os.Exit(exitStatus)
79	}
80
81	// convert reads the data from r, parses it as legacy present,
82	// and converts it to Markdown-enabled present.
83	// If any errors occur, the data is reported as coming from file.
84	// If writeBack is true, the converted version is written back to file.
85	// If writeBack is false, the converted version is printed to standard output.
86	func convert(r io.Reader, file string, writeBack bool) error {
87	data, err := ioutil.ReadAll(r)
88	if err != nil {
89	return err
90	}
91	if bytes.HasPrefix(data, []byte("# ")) {
92	return fmt.Errorf("%v: already markdown", file)
93	}
94
95	// Convert all comments before parsing the document.
96	// The '//' comment is treated as normal text and so
97	// is passed through the translation unaltered.
98	data = bytes.Replace(data, []byte("\n#"), []byte("\n//"), -1)
99
100	doc, err := present.Parse(bytes.NewReader(data), file, 0)
101	if err != nil {
102	return err
103	}
104
105	// Title and Subtitle, Time, Tags.
106	var md bytes.Buffer
107	fmt.Fprintf(&md, "# %s\n", doc.Title)
108	if doc.Subtitle != "" {
109	fmt.Fprintf(&md, "%s\n", doc.Subtitle)
110	}
111	if !doc.Time.IsZero() {
112	fmt.Fprintf(&md, "%s\n", doc.Time.Format("2 Jan 2006"))
113	}
114	if len(doc.Tags) > 0 {
115	fmt.Fprintf(&md, "Tags: %s\n", strings.Join(doc.Tags, ", "))
116	}
117
118	// Summary, defaulting to first paragraph of section.
119	// (Summaries must be explicit for Markdown-enabled present,
120	// and the expectation is that they will be shorter than the
121	// whole first paragraph. But this is what the blog does today.)
122	if strings.HasSuffix(file, ".article") && len(doc.Sections) > 0 {
123	for _, elem := range doc.Sections[0].Elem {
124	text, ok := elem.(present.Text)
125	if !ok \|\| text.Pre {
126	// skip everything but non-text elements
127	continue
128	}
129	fmt.Fprintf(&md, "Summary:")
130	for i, line := range text.Lines {
131	fmt.Fprintf(&md, " ")
132	printStyled(&md, line, i == 0)
133	}
134	fmt.Fprintf(&md, "\n")
135	break
136	}
137	}
138
139	// Authors
140	for _, a := range doc.Authors {
141	fmt.Fprintf(&md, "\n")
142	for _, elem := range a.Elem {
143	switch elem := elem.(type) {
144	default:
145	// Can only happen if this type switch is incomplete, which is a bug.
146	log.Fatalf("%s: unexpected author type %T", file, elem)
147	case present.Text:
148	for _, line := range elem.Lines {
149	fmt.Fprintf(&md, "%s\n", markdownEscape(line, true))
150	}
151	case present.Link:
152	fmt.Fprintf(&md, "%s\n", markdownEscape(elem.Label, true))
153	}
154	}
155	}
156
157	// Invariant: the output ends in non-blank line now,
158	// and after printing any piece of the file below,
159	// the output should still end in a non-blank line.
160	// If a blank line separator is needed, it should be printed
161	// before the block that needs separating, not after.
162
163	if len(doc.TitleNotes) > 0 {
164	fmt.Fprintf(&md, "\n")
165	for _, line := range doc.TitleNotes {
166	fmt.Fprintf(&md, ": %s\n", line)
167	}
168	}
169
170	if len(doc.Sections) == 1 && strings.HasSuffix(file, ".article") {
171	// Blog drops section headers when there is only one section.
172	// Don't print a title in this case, to make clear that it's being dropped.
173	fmt.Fprintf(&md, "\n##\n")
174	printSectionBody(file, 1, &md, doc.Sections[0].Elem)
175	} else {
176	for _, s := range doc.Sections {
177	fmt.Fprintf(&md, "\n")
178	fmt.Fprintf(&md, "## %s\n", markdownEscape(s.Title, false))
179	printSectionBody(file, 1, &md, s.Elem)
180	}
181	}
182
183	if !writeBack {
184	os.Stdout.Write(md.Bytes())
185	return nil
186	}
187	return ioutil.WriteFile(file, md.Bytes(), 0666)
188	}
189
190	func printSectionBody(file string, depth int, w *bytes.Buffer, elems []present.Elem) {
191	for _, elem := range elems {
192	switch elem := elem.(type) {
193	default:
194	// Can only happen if this type switch is incomplete, which is a bug.
195	log.Fatalf("%s: unexpected present element type %T", file, elem)
196
197	case present.Text:
198	fmt.Fprintf(w, "\n")
199	lines := elem.Lines
200	for len(lines) > 0 && lines[0] == "" {
201	lines = lines[1:]
202	}
203	if elem.Pre {
204	for _, line := range strings.Split(strings.TrimRight(elem.Raw, "\n"), "\n") {
205	if line == "" {
206	fmt.Fprintf(w, "\n")
207	} else {
208	fmt.Fprintf(w, "\t%s\n", line)
209	}
210	}
211	} else {
212	for _, line := range elem.Lines {
213	printStyled(w, line, true)
214	fmt.Fprintf(w, "\n")
215	}
216	}
217
218	case present.List:
219	fmt.Fprintf(w, "\n")
220	for _, item := range elem.Bullet {
221	fmt.Fprintf(w, " - ")
222	for i, line := range strings.Split(item, "\n") {
223	if i > 0 {
224	fmt.Fprintf(w, " ")
225	}
226	printStyled(w, line, false)
227	fmt.Fprintf(w, "\n")
228	}
229	}
230
231	case present.Section:
232	fmt.Fprintf(w, "\n")
233	sep := " "
234	if elem.Title == "" {
235	sep = ""
236	}
237	fmt.Fprintf(w, "%s%s%s\n", strings.Repeat("#", depth+2), sep, markdownEscape(elem.Title, false))
238	printSectionBody(file, depth+1, w, elem.Elem)
239
240	case interface{ PresentCmd() string }:
241	// If there are multiple present commands in a row, don't print a blank line before the second etc.
242	b := w.Bytes()
243	sep := "\n"
244	if len(b) > 0 {
245	i := bytes.LastIndexByte(b[:len(b)-1], '\n')
246	if b[i+1] == '.' {
247	sep = ""
248	}
249	}
250	fmt.Fprintf(w, "%s%s\n", sep, elem.PresentCmd())
251	}
252	}
253	}
254
255	func markdownEscape(s string, startLine bool) string {
256	var b strings.Builder
257	for i, r := range s {
258	switch {
259	case r == '#' && i == 0,
260	r == '*',
261	r == '_',
262	r == '<' && (i == 0 \|\| s[i-1] != ' ') && i+1 < len(s) && s[i+1] != ' ',
263	r == '[' && strings.Contains(s[i:], "]("):
264	b.WriteRune('\\')
265	}
266	b.WriteRune(r)
267	}
268	return b.String()
269	}
270
271	// Copy of ../../present/style.go adjusted to produce Markdown instead of HTML.
272
273	/*
274	Fonts are demarcated by an initial and final char bracketing a
275	space-delimited word, plus possibly some terminal punctuation.
276	The chars are
277	_ for italic
278	* for bold
279	` (back quote) for fixed width.
280	Inner appearances of the char become spaces. For instance,
281	_this_is_italic_!
282	becomes
283	<i>this is italic</i>!
284	*/
285
286	func printStyled(w *bytes.Buffer, text string, startLine bool) {
287	w.WriteString(font(text, startLine))
288	}
289
290	// font returns s with font indicators turned into HTML font tags.
291	func font(s string, startLine bool) string {
292	if !strings.ContainsAny(s, "[`_*") {
293	return markdownEscape(s, startLine)
294	}
295	words := split(s)
296	var b bytes.Buffer
297	Word:
298	for w, word := range words {
299	words[w] = markdownEscape(word, startLine && w == 0) // for all the continue Word
300	if len(word) < 2 {
301	continue Word
302	}
303	if link, _ := parseInlineLink(word); link != "" {
304	words[w] = link
305	continue Word
306	}
307	const marker = "_*`"
308	// Initial punctuation is OK but must be peeled off.
309	first := strings.IndexAny(word, marker)
310	if first == -1 {
311	continue Word
312	}
313	// Opening marker must be at the beginning of the token or else preceded by punctuation.
314	if first != 0 {
315	r, _ := utf8.DecodeLastRuneInString(word[:first])
316	if !unicode.IsPunct(r) {
317	continue Word
318	}
319	}
320	open, word := markdownEscape(word[:first], startLine && w == 0), word[first:]
321	char := word[0] // ASCII is OK.
322	close := ""
323	switch char {
324	default:
325	continue Word
326	case '_':
327	open += "_"
328	close = "_"
329	case '*':
330	open += "**"
331	close = "**"
332	case '`':
333	open += "`"
334	close = "`"
335	}
336	// Closing marker must be at the end of the token or else followed by punctuation.
337	last := strings.LastIndex(word, word[:1])
338	if last == 0 {
339	continue Word
340	}
341	if last+1 != len(word) {
342	r, _ := utf8.DecodeRuneInString(word[last+1:])
343	if !unicode.IsPunct(r) {
344	continue Word
345	}
346	}
347	head, tail := word[:last+1], word[last+1:]
348	b.Reset()
349	var wid int
350	for i := 1; i < len(head)-1; i += wid {
351	var r rune
352	r, wid = utf8.DecodeRuneInString(head[i:])
353	if r != rune(char) {
354	// Ordinary character.
355	b.WriteRune(r)
356	continue
357	}
358	if head[i+1] != char {
359	// Inner char becomes space.
360	b.WriteRune(' ')
361	continue
362	}
363	// Doubled char becomes real char.
364	// Not worth worrying about "_x__".
365	b.WriteByte(char)
366	wid++ // Consumed two chars, both ASCII.
367	}
368	text := b.String()
369	if close == "`" {
370	for strings.Contains(text, close) {
371	open += "`"
372	close += "`"
373	}
374	} else {
375	text = markdownEscape(text, false)
376	}
377	words[w] = open + text + close + tail
378	}
379	return strings.Join(words, "")
380	}
381
382	// split is like strings.Fields but also returns the runs of spaces
383	// and treats inline links as distinct words.
384	func split(s string) []string {
385	var (
386	words = make([]string, 0, 10)
387	start = 0
388	)
389
390	// appendWord appends the string s[start:end] to the words slice.
391	// If the word contains the beginning of a link, the non-link portion
392	// of the word and the entire link are appended as separate words,
393	// and the start index is advanced to the end of the link.
394	appendWord := func(end int) {
395	if j := strings.Index(s[start:end], "[["); j > -1 {
396	if _, l := parseInlineLink(s[start+j:]); l > 0 {
397	// Append portion before link, if any.
398	if j > 0 {
399	words = append(words, s[start:start+j])
400	}
401	// Append link itself.
402	words = append(words, s[start+j:start+j+l])
403	// Advance start index to end of link.
404	start = start + j + l
405	return
406	}
407	}
408	// No link; just add the word.
409	words = append(words, s[start:end])
410	start = end
411	}
412
413	wasSpace := false
414	for i, r := range s {
415	isSpace := unicode.IsSpace(r)
416	if i > start && isSpace != wasSpace {
417	appendWord(i)
418	}
419	wasSpace = isSpace
420	}
421	for start < len(s) {
422	appendWord(len(s))
423	}
424	return words
425	}
426
427	// parseInlineLink parses an inline link at the start of s, and returns
428	// a rendered Markdown link and the total length of the raw inline link.
429	// If no inline link is present, it returns all zeroes.
430	func parseInlineLink(s string) (link string, length int) {
431	if !strings.HasPrefix(s, "[[") {
432	return
433	}
434	end := strings.Index(s, "]]")
435	if end == -1 {
436	return
437	}
438	urlEnd := strings.Index(s, "]")
439	rawURL := s[2:urlEnd]
440	const badURLChars = `<>"{}\|\^[] ` + "`" // per RFC2396 section 2.4.3
441	if strings.ContainsAny(rawURL, badURLChars) {
442	return
443	}
444	if urlEnd == end {
445	simpleURL := ""
446	url, err := url.Parse(rawURL)
447	if err == nil {
448	// If the URL is http://foo.com, drop the http://
449	// In other words, render [[http://golang.org]] as:
450	// <a href="http://golang.org">golang.org</a>
451	if strings.HasPrefix(rawURL, url.Scheme+"://") {
452	simpleURL = strings.TrimPrefix(rawURL, url.Scheme+"://")
453	} else if strings.HasPrefix(rawURL, url.Scheme+":") {
454	simpleURL = strings.TrimPrefix(rawURL, url.Scheme+":")
455	}
456	}
457	return renderLink(rawURL, simpleURL), end + 2
458	}
459	if s[urlEnd:urlEnd+2] != "][" {
460	return
461	}
462	text := s[urlEnd+2 : end]
463	return renderLink(rawURL, text), end + 2
464	}
465
466	func renderLink(href, text string) string {
467	text = font(text, false)
468	if text == "" {
469	text = markdownEscape(href, false)
470	}
471	return "[" + text + "](" + href + ")"
472	}
473