gopls/godoc/index.go

1	// Copyright 2009 The Go Authors. All rights reserved.
2	// Use of this source code is governed by a BSD-style
3	// license that can be found in the LICENSE file.
4
5	// This file contains the infrastructure to create an
6	// identifier and full-text index for a set of Go files.
7	//
8	// Algorithm for identifier index:
9	// - traverse all .go files of the file tree specified by root
10	// - for each identifier (word) encountered, collect all occurrences (spots)
11	// into a list; this produces a list of spots for each word
12	// - reduce the lists: from a list of spots to a list of FileRuns,
13	// and from a list of FileRuns into a list of PakRuns
14	// - make a HitList from the PakRuns
15	//
16	// Details:
17	// - keep two lists per word: one containing package-level declarations
18	// that have snippets, and one containing all other spots
19	// - keep the snippets in a separate table indexed by snippet index
20	// and store the snippet index in place of the line number in a SpotInfo
21	// (the line number for spots with snippets is stored in the snippet)
22	// - at the end, create lists of alternative spellings for a given
23	// word
24	//
25	// Algorithm for full text index:
26	// - concatenate all source code in a byte buffer (in memory)
27	// - add the files to a file set in lockstep as they are added to the byte
28	// buffer such that a byte buffer offset corresponds to the Pos value for
29	// that file location
30	// - create a suffix array from the concatenated sources
31	//
32	// String lookup in full text index:
33	// - use the suffix array to lookup a string's offsets - the offsets
34	// correspond to the Pos values relative to the file set
35	// - translate the Pos values back into file and line information and
36	// sort the result
37
38	package godoc
39
40	import (
41	"bufio"
42	"bytes"
43	"encoding/gob"
44	"errors"
45	"fmt"
46	"go/ast"
47	"go/doc"
48	"go/parser"
49	"go/token"
50	"index/suffixarray"
51	"io"
52	"log"
53	"math"
54	"os"
55	pathpkg "path"
56	"path/filepath"
57	"regexp"
58	"runtime"
59	"sort"
60	"strconv"
61	"strings"
62	"sync"
63	"time"
64	"unicode"
65
66	"golang.org/x/tools/godoc/util"
67	"golang.org/x/tools/godoc/vfs"
68	)
69
70	// ----------------------------------------------------------------------------
71	// InterfaceSlice is a helper type for sorting interface
72	// slices according to some slice-specific sort criteria.
73
74	type comparer func(x, y interface{}) bool
75
76	type interfaceSlice struct {
77	slice []interface{}
78	less comparer
79	}
80
81	// ----------------------------------------------------------------------------
82	// RunList
83
84	// A RunList is a list of entries that can be sorted according to some
85	// criteria. A RunList may be compressed by grouping "runs" of entries
86	// which are equal (according to the sort criteria) into a new RunList of
87	// runs. For instance, a RunList containing pairs (x, y) may be compressed
88	// into a RunList containing pair runs (x, {y}) where each run consists of
89	// a list of y's with the same x.
90	type RunList []interface{}
91
92	func (h RunList) sort(less comparer) {
93	sort.Sort(&interfaceSlice{h, less})
94	}
95
96	func (p *interfaceSlice) Len() int { return len(p.slice) }
97	func (p *interfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) }
98	func (p *interfaceSlice) Swap(i, j int) { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] }
99
100	// Compress entries which are the same according to a sort criteria
101	// (specified by less) into "runs".
102	func (h RunList) reduce(less comparer, newRun func(h RunList) interface{}) RunList {
103	if len(h) == 0 {
104	return nil
105	}
106	// len(h) > 0
107
108	// create runs of entries with equal values
109	h.sort(less)
110
111	// for each run, make a new run object and collect them in a new RunList
112	var hh RunList
113	i, x := 0, h[0]
114	for j, y := range h {
115	if less(x, y) {
116	hh = append(hh, newRun(h[i:j]))
117	i, x = j, h[j] // start a new run
118	}
119	}
120	// add final run, if any
121	if i < len(h) {
122	hh = append(hh, newRun(h[i:]))
123	}
124
125	return hh
126	}
127
128	// ----------------------------------------------------------------------------
129	// KindRun
130
131	// Debugging support. Disable to see multiple entries per line.
132	const removeDuplicates = true
133
134	// A KindRun is a run of SpotInfos of the same kind in a given file.
135	// The kind (3 bits) is stored in each SpotInfo element; to find the
136	// kind of a KindRun, look at any of its elements.
137	type KindRun []SpotInfo
138
139	// KindRuns are sorted by line number or index. Since the isIndex bit
140	// is always the same for all infos in one list we can compare lori's.
141	func (k KindRun) Len() int { return len(k) }
142	func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() }
143	func (k KindRun) Swap(i, j int) { k[i], k[j] = k[j], k[i] }
144
145	// FileRun contents are sorted by Kind for the reduction into KindRuns.
146	func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() }
147
148	// newKindRun allocates a new KindRun from the SpotInfo run h.
149	func newKindRun(h RunList) interface{} {
150	run := make(KindRun, len(h))
151	for i, x := range h {
152	run[i] = x.(SpotInfo)
153	}
154
155	// Spots were sorted by file and kind to create this run.
156	// Within this run, sort them by line number or index.
157	sort.Sort(run)
158
159	if removeDuplicates {
160	// Since both the lori and kind field must be
161	// same for duplicates, and since the isIndex
162	// bit is always the same for all infos in one
163	// list we can simply compare the entire info.
164	k := 0
165	prev := SpotInfo(math.MaxUint32) // an unlikely value
166	for _, x := range run {
167	if x != prev {
168	run[k] = x
169	k++
170	prev = x
171	}
172	}
173	run = run[0:k]
174	}
175
176	return run
177	}
178
179	// ----------------------------------------------------------------------------
180	// FileRun
181
182	// A Pak describes a Go package.
183	type Pak struct {
184	Path string // path of directory containing the package
185	Name string // package name as declared by package clause
186	}
187
188	// Paks are sorted by name (primary key) and by import path (secondary key).
189	func (p Pak) less(q Pak) bool {
190	return p.Name < q.Name \|\| p.Name == q.Name && p.Path < q.Path
191	}
192
193	// A File describes a Go file.
194	type File struct {
195	Name string // directory-local file name
196	Pak *Pak // the package to which the file belongs
197	}
198
199	// Path returns the file path of f.
200	func (f *File) Path() string {
201	return pathpkg.Join(f.Pak.Path, f.Name)
202	}
203
204	// A Spot describes a single occurrence of a word.
205	type Spot struct {
206	File *File
207	Info SpotInfo
208	}
209
210	// A FileRun is a list of KindRuns belonging to the same file.
211	type FileRun struct {
212	File *File
213	Groups []KindRun
214	}
215
216	// Spots are sorted by file path for the reduction into FileRuns.
217	func lessSpot(x, y interface{}) bool {
218	fx := x.(Spot).File
219	fy := y.(Spot).File
220	// same as "return fx.Path() < fy.Path()" but w/o computing the file path first
221	px := fx.Pak.Path
222	py := fy.Pak.Path
223	return px < py \|\| px == py && fx.Name < fy.Name
224	}
225
226	// newFileRun allocates a new FileRun from the Spot run h.
227	func newFileRun(h RunList) interface{} {
228	file := h[0].(Spot).File
229
230	// reduce the list of Spots into a list of KindRuns
231	h1 := make(RunList, len(h))
232	for i, x := range h {
233	h1[i] = x.(Spot).Info
234	}
235	h2 := h1.reduce(lessKind, newKindRun)
236
237	// create the FileRun
238	groups := make([]KindRun, len(h2))
239	for i, x := range h2 {
240	groups[i] = x.(KindRun)
241	}
242	return &FileRun{file, groups}
243	}
244
245	// ----------------------------------------------------------------------------
246	// PakRun
247
248	// A PakRun describes a run of *FileRuns of a package.
249	type PakRun struct {
250	Pak *Pak
251	Files []*FileRun
252	}
253
254	// Sorting support for files within a PakRun.
255	func (p *PakRun) Len() int { return len(p.Files) }
256	func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name }
257	func (p *PakRun) Swap(i, j int) { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] }
258
259	// FileRuns are sorted by package for the reduction into PakRuns.
260	func lessFileRun(x, y interface{}) bool {
261	return x.(FileRun).File.Pak.less(y.(FileRun).File.Pak)
262	}
263
264	// newPakRun allocates a new PakRun from the *FileRun run h.
265	func newPakRun(h RunList) interface{} {
266	pak := h[0].(*FileRun).File.Pak
267	files := make([]*FileRun, len(h))
268	for i, x := range h {
269	files[i] = x.(*FileRun)
270	}
271	run := &PakRun{pak, files}
272	sort.Sort(run) // files were sorted by package; sort them by file now
273	return run
274	}
275
276	// ----------------------------------------------------------------------------
277	// HitList
278
279	// A HitList describes a list of PakRuns.
280	type HitList []*PakRun
281
282	// PakRuns are sorted by package.
283	func lessPakRun(x, y interface{}) bool { return x.(PakRun).Pak.less(y.(PakRun).Pak) }
284
285	func reduce(h0 RunList) HitList {
286	// reduce a list of Spots into a list of FileRuns
287	h1 := h0.reduce(lessSpot, newFileRun)
288	// reduce a list of FileRuns into a list of PakRuns
289	h2 := h1.reduce(lessFileRun, newPakRun)
290	// sort the list of PakRuns by package
291	h2.sort(lessPakRun)
292	// create a HitList
293	h := make(HitList, len(h2))
294	for i, p := range h2 {
295	h[i] = p.(*PakRun)
296	}
297	return h
298	}
299
300	// filter returns a new HitList created by filtering
301	// all PakRuns from h that have a matching pakname.
302	func (h HitList) filter(pakname string) HitList {
303	var hh HitList
304	for _, p := range h {
305	if p.Pak.Name == pakname {
306	hh = append(hh, p)
307	}
308	}
309	return hh
310	}
311
312	// ----------------------------------------------------------------------------
313	// AltWords
314
315	type wordPair struct {
316	canon string // canonical word spelling (all lowercase)
317	alt string // alternative spelling
318	}
319
320	// An AltWords describes a list of alternative spellings for a
321	// canonical (all lowercase) spelling of a word.
322	type AltWords struct {
323	Canon string // canonical word spelling (all lowercase)
324	Alts []string // alternative spelling for the same word
325	}
326
327	// wordPairs are sorted by their canonical spelling.
328	func lessWordPair(x, y interface{}) bool { return x.(wordPair).canon < y.(wordPair).canon }
329
330	// newAltWords allocates a new AltWords from the *wordPair run h.
331	func newAltWords(h RunList) interface{} {
332	canon := h[0].(*wordPair).canon
333	alts := make([]string, len(h))
334	for i, x := range h {
335	alts[i] = x.(*wordPair).alt
336	}
337	return &AltWords{canon, alts}
338	}
339
340	func (a AltWords) filter(s string) AltWords {
341	var alts []string
342	for _, w := range a.Alts {
343	if w != s {
344	alts = append(alts, w)
345	}
346	}
347	if len(alts) > 0 {
348	return &AltWords{a.Canon, alts}
349	}
350	return nil
351	}
352
353	// Ident stores information about external identifiers in order to create
354	// links to package documentation.
355	type Ident struct {
356	Path string // e.g. "net/http"
357	Package string // e.g. "http"
358	Name string // e.g. "NewRequest"
359	Doc string // e.g. "NewRequest returns a new Request..."
360	}
361
362	// byImportCount sorts the given slice of Idents by the import
363	// counts of the packages to which they belong.
364	type byImportCount struct {
365	Idents []Ident
366	ImportCount map[string]int
367	}
368
369	func (ic byImportCount) Len() int {
370	return len(ic.Idents)
371	}
372
373	func (ic byImportCount) Less(i, j int) bool {
374	ri := ic.ImportCount[ic.Idents[i].Path]
375	rj := ic.ImportCount[ic.Idents[j].Path]
376	if ri == rj {
377	return ic.Idents[i].Path < ic.Idents[j].Path
378	}
379	return ri > rj
380	}
381
382	func (ic byImportCount) Swap(i, j int) {
383	ic.Idents[i], ic.Idents[j] = ic.Idents[j], ic.Idents[i]
384	}
385
386	func (ic byImportCount) String() string {
387	buf := bytes.NewBuffer([]byte("["))
388	for _, v := range ic.Idents {
389	buf.WriteString(fmt.Sprintf("\n\t%s, %s (%d)", v.Path, v.Name, ic.ImportCount[v.Path]))
390	}
391	buf.WriteString("\n]")
392	return buf.String()
393	}
394
395	// filter creates a new Ident list where the results match the given
396	// package name.
397	func (ic byImportCount) filter(pakname string) []Ident {
398	if ic.Idents == nil {
399	return nil
400	}
401	var res []Ident
402	for _, i := range ic.Idents {
403	if i.Package == pakname {
404	res = append(res, i)
405	}
406	}
407	return res
408	}
409
410	// top returns the top n identifiers.
411	func (ic byImportCount) top(n int) []Ident {
412	if len(ic.Idents) > n {
413	return ic.Idents[:n]
414	}
415	return ic.Idents
416	}
417
418	// ----------------------------------------------------------------------------
419	// Indexer
420
421	type IndexResult struct {
422	Decls RunList // package-level declarations (with snippets)
423	Others RunList // all other occurrences
424	}
425
426	// Statistics provides statistics information for an index.
427	type Statistics struct {
428	Bytes int // total size of indexed source files
429	Files int // number of indexed source files
430	Lines int // number of lines (all files)
431	Words int // number of different identifiers
432	Spots int // number of identifier occurrences
433	}
434
435	// An Indexer maintains the data structures and provides the machinery
436	// for indexing .go files under a file tree. It implements the path.Visitor
437	// interface for walking file trees, and the ast.Visitor interface for
438	// walking Go ASTs.
439	type Indexer struct {
440	c *Corpus
441	fset *token.FileSet // file set for all indexed files
442	fsOpenGate chan bool // send pre fs.Open; receive on close
443
444	mu sync.Mutex // guards all the following
445	sources bytes.Buffer // concatenated sources
446	strings map[string]string // interned string
447	packages map[Pak]Pak // interned Paks
448	words map[string]*IndexResult // RunLists of Spots
449	snippets []*Snippet // indices are stored in SpotInfos
450	current *token.File // last file added to file set
451	file *File // AST for current file
452	decl ast.Decl // AST for current decl
453	stats Statistics
454	throttle *util.Throttle
455	importCount map[string]int // package path ("net/http") => count
456	packagePath map[string]map[string]bool // "template" => "text/template" => true
457	exports map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl
458	curPkgExports map[string]SpotKind
459	idents map[SpotKind]map[string][]Ident // kind => name => list of Idents
460	}
461
462	func (x *Indexer) intern(s string) string {
463	if s, ok := x.strings[s]; ok {
464	return s
465	}
466	x.strings[s] = s
467	return s
468	}
469
470	func (x Indexer) lookupPackage(path, name string) Pak {
471	// In the source directory tree, more than one package may
472	// live in the same directory. For the packages map, construct
473	// a key that includes both the directory path and the package
474	// name.
475	key := Pak{Path: x.intern(path), Name: x.intern(name)}
476	pak := x.packages[key]
477	if pak == nil {
478	pak = &key
479	x.packages[key] = pak
480	}
481	return pak
482	}
483
484	func (x Indexer) addSnippet(s Snippet) int {
485	index := len(x.snippets)
486	x.snippets = append(x.snippets, s)
487	return index
488	}
489
490	func (x Indexer) visitIdent(kind SpotKind, id ast.Ident) {
491	if id == nil {
492	return
493	}
494	name := x.intern(id.Name)
495
496	switch kind {
497	case TypeDecl, FuncDecl, ConstDecl, VarDecl:
498	x.curPkgExports[name] = kind
499	}
500
501	lists, found := x.words[name]
502	if !found {
503	lists = new(IndexResult)
504	x.words[name] = lists
505	}
506
507	if kind == Use \|\| x.decl == nil {
508	if x.c.IndexGoCode {
509	// not a declaration or no snippet required
510	info := makeSpotInfo(kind, x.current.Line(id.Pos()), false)
511	lists.Others = append(lists.Others, Spot{x.file, info})
512	}
513	} else {
514	// a declaration with snippet
515	index := x.addSnippet(NewSnippet(x.fset, x.decl, id))
516	info := makeSpotInfo(kind, index, true)
517	lists.Decls = append(lists.Decls, Spot{x.file, info})
518	}
519
520	x.stats.Spots++
521	}
522
523	func (x Indexer) visitFieldList(kind SpotKind, flist ast.FieldList) {
524	for _, f := range flist.List {
525	x.decl = nil // no snippets for fields
526	for _, name := range f.Names {
527	x.visitIdent(kind, name)
528	}
529	ast.Walk(x, f.Type)
530	// ignore tag - not indexed at the moment
531	}
532	}
533
534	func (x *Indexer) visitSpec(kind SpotKind, spec ast.Spec) {
535	switch n := spec.(type) {
536	case *ast.ImportSpec:
537	x.visitIdent(ImportDecl, n.Name)
538	if n.Path != nil {
539	if imp, err := strconv.Unquote(n.Path.Value); err == nil {
540	x.importCount[x.intern(imp)]++
541	}
542	}
543
544	case *ast.ValueSpec:
545	for _, n := range n.Names {
546	x.visitIdent(kind, n)
547	}
548	ast.Walk(x, n.Type)
549	for _, v := range n.Values {
550	ast.Walk(x, v)
551	}
552
553	case *ast.TypeSpec:
554	x.visitIdent(TypeDecl, n.Name)
555	ast.Walk(x, n.Type)
556	}
557	}
558
559	func (x Indexer) visitGenDecl(decl ast.GenDecl) {
560	kind := VarDecl
561	if decl.Tok == token.CONST {
562	kind = ConstDecl
563	}
564	x.decl = decl
565	for _, s := range decl.Specs {
566	x.visitSpec(kind, s)
567	}
568	}
569
570	func (x *Indexer) Visit(node ast.Node) ast.Visitor {
571	switch n := node.(type) {
572	case nil:
573	// nothing to do
574
575	case *ast.Ident:
576	x.visitIdent(Use, n)
577
578	case *ast.FieldList:
579	x.visitFieldList(VarDecl, n)
580
581	case *ast.InterfaceType:
582	x.visitFieldList(MethodDecl, n.Methods)
583
584	case *ast.DeclStmt:
585	// local declarations should only be *ast.GenDecls;
586	// ignore incorrect ASTs
587	if decl, ok := n.Decl.(*ast.GenDecl); ok {
588	x.decl = nil // no snippets for local declarations
589	x.visitGenDecl(decl)
590	}
591
592	case *ast.GenDecl:
593	x.decl = n
594	x.visitGenDecl(n)
595
596	case *ast.FuncDecl:
597	kind := FuncDecl
598	if n.Recv != nil {
599	kind = MethodDecl
600	ast.Walk(x, n.Recv)
601	}
602	x.decl = n
603	x.visitIdent(kind, n.Name)
604	ast.Walk(x, n.Type)
605	if n.Body != nil {
606	ast.Walk(x, n.Body)
607	}
608
609	case *ast.File:
610	x.decl = nil
611	x.visitIdent(PackageClause, n.Name)
612	for _, d := range n.Decls {
613	ast.Walk(x, d)
614	}
615
616	default:
617	return x
618	}
619
620	return nil
621	}
622
623	// addFile adds a file to the index if possible and returns the file set file
624	// and the file's AST if it was successfully parsed as a Go file. If addFile
625	// failed (that is, if the file was not added), it returns file == nil.
626	func (x Indexer) addFile(f vfs.ReadSeekCloser, filename string, goFile bool) (file token.File, ast *ast.File) {
627	defer f.Close()
628
629	// The file set's base offset and x.sources size must be in lock-step;
630	// this permits the direct mapping of suffix array lookup results to
631	// to corresponding Pos values.
632	//
633	// When a file is added to the file set, its offset base increases by
634	// the size of the file + 1; and the initial base offset is 1. Add an
635	// extra byte to the sources here.
636	x.sources.WriteByte(0)
637
638	// If the sources length doesn't match the file set base at this point
639	// the file set implementation changed or we have another error.
640	base := x.fset.Base()
641	if x.sources.Len() != base {
642	panic("internal error: file base incorrect")
643	}
644
645	// append file contents (src) to x.sources
646	if _, err := x.sources.ReadFrom(f); err == nil {
647	src := x.sources.Bytes()[base:]
648
649	if goFile {
650	// parse the file and in the process add it to the file set
651	if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil {
652	file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file
653	return
654	}
655	// file has parse errors, and the AST may be incorrect -
656	// set lines information explicitly and index as ordinary
657	// text file (cannot fall through to the text case below
658	// because the file has already been added to the file set
659	// by the parser)
660	file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file
661	file.SetLinesForContent(src)
662	ast = nil
663	return
664	}
665
666	if util.IsText(src) {
667	// only add the file to the file set (for the full text index)
668	file = x.fset.AddFile(filename, x.fset.Base(), len(src))
669	file.SetLinesForContent(src)
670	return
671	}
672	}
673
674	// discard possibly added data
675	x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added
676	return
677	}
678
679	// Design note: Using an explicit white list of permitted files for indexing
680	// makes sure that the important files are included and massively reduces the
681	// number of files to index. The advantage over a blacklist is that unexpected
682	// (non-blacklisted) files won't suddenly explode the index.
683
684	// Files are whitelisted if they have a file name or extension
685	// present as key in whitelisted.
686	var whitelisted = map[string]bool{
687	".bash": true,
688	".c": true,
689	".cc": true,
690	".cpp": true,
691	".cxx": true,
692	".css": true,
693	".go": true,
694	".goc": true,
695	".h": true,
696	".hh": true,
697	".hpp": true,
698	".hxx": true,
699	".html": true,
700	".js": true,
701	".out": true,
702	".py": true,
703	".s": true,
704	".sh": true,
705	".txt": true,
706	".xml": true,
707	"AUTHORS": true,
708	"CONTRIBUTORS": true,
709	"LICENSE": true,
710	"Makefile": true,
711	"PATENTS": true,
712	"README": true,
713	}
714
715	// isWhitelisted returns true if a file is on the list
716	// of "permitted" files for indexing. The filename must
717	// be the directory-local name of the file.
718	func isWhitelisted(filename string) bool {
719	key := pathpkg.Ext(filename)
720	if key == "" {
721	// file has no extension - use entire filename
722	key = filename
723	}
724	return whitelisted[key]
725	}
726
727	func (x Indexer) indexDocs(dirname string, filename string, astFile ast.File) {
728	pkgName := x.intern(astFile.Name.Name)
729	if pkgName == "main" {
730	return
731	}
732	pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/"))
733	astPkg := ast.Package{
734	Name: pkgName,
735	Files: map[string]*ast.File{
736	filename: astFile,
737	},
738	}
739	var m doc.Mode
740	docPkg := doc.New(&astPkg, dirname, m)
741	addIdent := func(sk SpotKind, name string, docstr string) {
742	if x.idents[sk] == nil {
743	x.idents[sk] = make(map[string][]Ident)
744	}
745	name = x.intern(name)
746	x.idents[sk][name] = append(x.idents[sk][name], Ident{
747	Path: pkgPath,
748	Package: pkgName,
749	Name: name,
750	Doc: doc.Synopsis(docstr),
751	})
752	}
753
754	if x.idents[PackageClause] == nil {
755	x.idents[PackageClause] = make(map[string][]Ident)
756	}
757	// List of words under which the package identifier will be stored.
758	// This includes the package name and the components of the directory
759	// in which it resides.
760	words := strings.Split(pathpkg.Dir(pkgPath), "/")
761	if words[0] == "." {
762	words = []string{}
763	}
764	name := x.intern(docPkg.Name)
765	synopsis := doc.Synopsis(docPkg.Doc)
766	words = append(words, name)
767	pkgIdent := Ident{
768	Path: pkgPath,
769	Package: pkgName,
770	Name: name,
771	Doc: synopsis,
772	}
773	for _, word := range words {
774	word = x.intern(word)
775	found := false
776	pkgs := x.idents[PackageClause][word]
777	for i, p := range pkgs {
778	if p.Path == pkgPath {
779	if docPkg.Doc != "" {
780	p.Doc = synopsis
781	pkgs[i] = p
782	}
783	found = true
784	break
785	}
786	}
787	if !found {
788	x.idents[PackageClause][word] = append(x.idents[PackageClause][word], pkgIdent)
789	}
790	}
791
792	for _, c := range docPkg.Consts {
793	for _, name := range c.Names {
794	addIdent(ConstDecl, name, c.Doc)
795	}
796	}
797	for _, t := range docPkg.Types {
798	addIdent(TypeDecl, t.Name, t.Doc)
799	for _, c := range t.Consts {
800	for _, name := range c.Names {
801	addIdent(ConstDecl, name, c.Doc)
802	}
803	}
804	for _, v := range t.Vars {
805	for _, name := range v.Names {
806	addIdent(VarDecl, name, v.Doc)
807	}
808	}
809	for _, f := range t.Funcs {
810	addIdent(FuncDecl, f.Name, f.Doc)
811	}
812	for _, f := range t.Methods {
813	addIdent(MethodDecl, f.Name, f.Doc)
814	// Change the name of methods to be "<typename>.<methodname>".
815	// They will still be indexed as <methodname>.
816	idents := x.idents[MethodDecl][f.Name]
817	idents[len(idents)-1].Name = x.intern(t.Name + "." + f.Name)
818	}
819	}
820	for _, v := range docPkg.Vars {
821	for _, name := range v.Names {
822	addIdent(VarDecl, name, v.Doc)
823	}
824	}
825	for _, f := range docPkg.Funcs {
826	addIdent(FuncDecl, f.Name, f.Doc)
827	}
828	}
829
830	func (x Indexer) indexGoFile(dirname string, filename string, file token.File, astFile *ast.File) {
831	pkgName := astFile.Name.Name
832
833	if x.c.IndexGoCode {
834	x.current = file
835	pak := x.lookupPackage(dirname, pkgName)
836	x.file = &File{filename, pak}
837	ast.Walk(x, astFile)
838	}
839
840	if x.c.IndexDocs {
841	// Test files are already filtered out in visitFile if IndexGoCode and
842	// IndexFullText are false. Otherwise, check here.
843	isTestFile := (x.c.IndexGoCode \|\| x.c.IndexFullText) &&
844	(strings.HasSuffix(filename, "_test.go") \|\| strings.HasPrefix(dirname, "/test/"))
845	if !isTestFile {
846	x.indexDocs(dirname, filename, astFile)
847	}
848	}
849
850	ppKey := x.intern(pkgName)
851	if _, ok := x.packagePath[ppKey]; !ok {
852	x.packagePath[ppKey] = make(map[string]bool)
853	}
854	pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/"))
855	x.packagePath[ppKey][pkgPath] = true
856
857	// Merge in exported symbols found walking this file into
858	// the map for that package.
859	if len(x.curPkgExports) > 0 {
860	dest, ok := x.exports[pkgPath]
861	if !ok {
862	dest = make(map[string]SpotKind)
863	x.exports[pkgPath] = dest
864	}
865	for k, v := range x.curPkgExports {
866	dest[k] = v
867	}
868	}
869	}
870
871	func (x *Indexer) visitFile(dirname string, fi os.FileInfo) {
872	if fi.IsDir() \|\| !x.c.IndexEnabled {
873	return
874	}
875
876	filename := pathpkg.Join(dirname, fi.Name())
877	goFile := isGoFile(fi)
878
879	switch {
880	case x.c.IndexFullText:
881	if !isWhitelisted(fi.Name()) {
882	return
883	}
884	case x.c.IndexGoCode:
885	if !goFile {
886	return
887	}
888	case x.c.IndexDocs:
889	if !goFile \|\|
890	strings.HasSuffix(fi.Name(), "_test.go") \|\|
891	strings.HasPrefix(dirname, "/test/") {
892	return
893	}
894	default:
895	// No indexing turned on.
896	return
897	}
898
899	x.fsOpenGate <- true
900	defer func() { <-x.fsOpenGate }()
901
902	// open file
903	f, err := x.c.fs.Open(filename)
904	if err != nil {
905	return
906	}
907
908	x.mu.Lock()
909	defer x.mu.Unlock()
910
911	x.throttle.Throttle()
912
913	x.curPkgExports = make(map[string]SpotKind)
914	file, fast := x.addFile(f, filename, goFile)
915	if file == nil {
916	return // addFile failed
917	}
918
919	if fast != nil {
920	x.indexGoFile(dirname, fi.Name(), file, fast)
921	}
922
923	// update statistics
924	x.stats.Bytes += file.Size()
925	x.stats.Files++
926	x.stats.Lines += file.LineCount()
927	}
928
929	// indexOptions contains information that affects the contents of an index.
930	type indexOptions struct {
931	// Docs provides documentation search results.
932	// It is only consulted if IndexEnabled is true.
933	// The default values is true.
934	Docs bool
935
936	// GoCode provides Go source code search results.
937	// It is only consulted if IndexEnabled is true.
938	// The default values is true.
939	GoCode bool
940
941	// FullText provides search results from all files.
942	// It is only consulted if IndexEnabled is true.
943	// The default values is true.
944	FullText bool
945
946	// MaxResults optionally specifies the maximum results for indexing.
947	// The default is 1000.
948	MaxResults int
949	}
950
951	// ----------------------------------------------------------------------------
952	// Index
953
954	type LookupResult struct {
955	Decls HitList // package-level declarations (with snippets)
956	Others HitList // all other occurrences
957	}
958
959	type Index struct {
960	fset *token.FileSet // file set used during indexing; nil if no textindex
961	suffixes *suffixarray.Index // suffixes for concatenated sources; nil if no textindex
962	words map[string]*LookupResult // maps words to hit lists
963	alts map[string]*AltWords // maps canonical(words) to lists of alternative spellings
964	snippets []*Snippet // all snippets, indexed by snippet index
965	stats Statistics
966	importCount map[string]int // package path ("net/http") => count
967	packagePath map[string]map[string]bool // "template" => "text/template" => true
968	exports map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl
969	idents map[SpotKind]map[string][]Ident
970	opts indexOptions
971	}
972
973	func canonical(w string) string { return strings.ToLower(w) }
974
975	// Somewhat arbitrary, but I figure low enough to not hurt disk-based filesystems
976	// consuming file descriptors, where some systems have low 256 or 512 limits.
977	// Go should have a built-in way to cap fd usage under the ulimit.
978	const (
979	maxOpenFiles = 200
980	maxOpenDirs = 50
981	)
982
983	func (c *Corpus) throttle() float64 {
984	if c.IndexThrottle <= 0 {
985	return 0.9
986	}
987	if c.IndexThrottle > 1.0 {
988	return 1.0
989	}
990	return c.IndexThrottle
991	}
992
993	// NewIndex creates a new index for the .go files provided by the corpus.
994	func (c Corpus) NewIndex() Index {
995	// initialize Indexer
996	// (use some reasonably sized maps to start)
997	x := &Indexer{
998	c: c,
999	fset: token.NewFileSet(),
1000	fsOpenGate: make(chan bool, maxOpenFiles),
1001	strings: make(map[string]string),
1002	packages: make(map[Pak]*Pak, 256),
1003	words: make(map[string]*IndexResult, 8192),
1004	throttle: util.NewThrottle(c.throttle(), 100*time.Millisecond), // run at least 0.1s at a time
1005	importCount: make(map[string]int),
1006	packagePath: make(map[string]map[string]bool),
1007	exports: make(map[string]map[string]SpotKind),
1008	idents: make(map[SpotKind]map[string][]Ident, 4),
1009	}
1010
1011	// index all files in the directories given by dirnames
1012	var wg sync.WaitGroup // outstanding ReadDir + visitFile
1013	dirGate := make(chan bool, maxOpenDirs)
1014	for dirname := range c.fsDirnames() {
1015	if c.IndexDirectory != nil && !c.IndexDirectory(dirname) {
1016	continue
1017	}
1018	dirGate <- true
1019	wg.Add(1)
1020	go func(dirname string) {
1021	defer func() { <-dirGate }()
1022	defer wg.Done()
1023
1024	list, err := c.fs.ReadDir(dirname)
1025	if err != nil {
1026	log.Printf("ReadDir(%q): %v; skipping directory", dirname, err)
1027	return // ignore this directory
1028	}
1029	for _, fi := range list {
1030	wg.Add(1)
1031	go func(fi os.FileInfo) {
1032	defer wg.Done()
1033	x.visitFile(dirname, fi)
1034	}(fi)
1035	}
1036	}(dirname)
1037	}
1038	wg.Wait()
1039
1040	if !c.IndexFullText {
1041	// the file set, the current file, and the sources are
1042	// not needed after indexing if no text index is built -
1043	// help GC and clear them
1044	x.fset = nil
1045	x.sources.Reset()
1046	x.current = nil // contains reference to fset!
1047	}
1048
1049	// for each word, reduce the RunLists into a LookupResult;
1050	// also collect the word with its canonical spelling in a
1051	// word list for later computation of alternative spellings
1052	words := make(map[string]*LookupResult)
1053	var wlist RunList
1054	for w, h := range x.words {
1055	decls := reduce(h.Decls)
1056	others := reduce(h.Others)
1057	words[w] = &LookupResult{
1058	Decls: decls,
1059	Others: others,
1060	}
1061	wlist = append(wlist, &wordPair{canonical(w), w})
1062	x.throttle.Throttle()
1063	}
1064	x.stats.Words = len(words)
1065
1066	// reduce the word list {canonical(w), w} into
1067	// a list of AltWords runs {canonical(w), {w}}
1068	alist := wlist.reduce(lessWordPair, newAltWords)
1069
1070	// convert alist into a map of alternative spellings
1071	alts := make(map[string]*AltWords)
1072	for i := 0; i < len(alist); i++ {
1073	a := alist[i].(*AltWords)
1074	alts[a.Canon] = a
1075	}
1076
1077	// create text index
1078	var suffixes *suffixarray.Index
1079	if c.IndexFullText {
1080	suffixes = suffixarray.New(x.sources.Bytes())
1081	}
1082
1083	// sort idents by the number of imports of their respective packages
1084	for _, idMap := range x.idents {
1085	for _, ir := range idMap {
1086	sort.Sort(byImportCount{ir, x.importCount})
1087	}
1088	}
1089
1090	return &Index{
1091	fset: x.fset,
1092	suffixes: suffixes,
1093	words: words,
1094	alts: alts,
1095	snippets: x.snippets,
1096	stats: x.stats,
1097	importCount: x.importCount,
1098	packagePath: x.packagePath,
1099	exports: x.exports,
1100	idents: x.idents,
1101	opts: indexOptions{
1102	Docs: x.c.IndexDocs,
1103	GoCode: x.c.IndexGoCode,
1104	FullText: x.c.IndexFullText,
1105	MaxResults: x.c.MaxResults,
1106	},
1107	}
1108	}
1109
1110	var ErrFileIndexVersion = errors.New("file index version out of date")
1111
1112	const fileIndexVersion = 3
1113
1114	// fileIndex is the subset of Index that's gob-encoded for use by
1115	// Index.Write and Index.Read.
1116	type fileIndex struct {
1117	Version int
1118	Words map[string]*LookupResult
1119	Alts map[string]*AltWords
1120	Snippets []*Snippet
1121	Fulltext bool
1122	Stats Statistics
1123	ImportCount map[string]int
1124	PackagePath map[string]map[string]bool
1125	Exports map[string]map[string]SpotKind
1126	Idents map[SpotKind]map[string][]Ident
1127	Opts indexOptions
1128	}
1129
1130	func (x *fileIndex) Write(w io.Writer) error {
1131	return gob.NewEncoder(w).Encode(x)
1132	}
1133
1134	func (x *fileIndex) Read(r io.Reader) error {
1135	return gob.NewDecoder(r).Decode(x)
1136	}
1137
1138	// WriteTo writes the index x to w.
1139	func (x *Index) WriteTo(w io.Writer) (n int64, err error) {
1140	w = countingWriter{&n, w}
1141	fulltext := false
1142	if x.suffixes != nil {
1143	fulltext = true
1144	}
1145	fx := fileIndex{
1146	Version: fileIndexVersion,
1147	Words: x.words,
1148	Alts: x.alts,
1149	Snippets: x.snippets,
1150	Fulltext: fulltext,
1151	Stats: x.stats,
1152	ImportCount: x.importCount,
1153	PackagePath: x.packagePath,
1154	Exports: x.exports,
1155	Idents: x.idents,
1156	Opts: x.opts,
1157	}
1158	if err := fx.Write(w); err != nil {
1159	return 0, err
1160	}
1161	if fulltext {
1162	encode := func(x interface{}) error {
1163	return gob.NewEncoder(w).Encode(x)
1164	}
1165	if err := x.fset.Write(encode); err != nil {
1166	return 0, err
1167	}
1168	if err := x.suffixes.Write(w); err != nil {
1169	return 0, err
1170	}
1171	}
1172	return n, nil
1173	}
1174
1175	// ReadFrom reads the index from r into x; x must not be nil.
1176	// If r does not also implement io.ByteReader, it will be wrapped in a bufio.Reader.
1177	// If the index is from an old version, the error is ErrFileIndexVersion.
1178	func (x *Index) ReadFrom(r io.Reader) (n int64, err error) {
1179	// We use the ability to read bytes as a plausible surrogate for buffering.
1180	if _, ok := r.(io.ByteReader); !ok {
1181	r = bufio.NewReader(r)
1182	}
1183	r = countingReader{&n, r.(byteReader)}
1184	var fx fileIndex
1185	if err := fx.Read(r); err != nil {
1186	return n, err
1187	}
1188	if fx.Version != fileIndexVersion {
1189	return 0, ErrFileIndexVersion
1190	}
1191	x.words = fx.Words
1192	x.alts = fx.Alts
1193	x.snippets = fx.Snippets
1194	x.stats = fx.Stats
1195	x.importCount = fx.ImportCount
1196	x.packagePath = fx.PackagePath
1197	x.exports = fx.Exports
1198	x.idents = fx.Idents
1199	x.opts = fx.Opts
1200	if fx.Fulltext {
1201	x.fset = token.NewFileSet()
1202	decode := func(x interface{}) error {
1203	return gob.NewDecoder(r).Decode(x)
1204	}
1205	if err := x.fset.Read(decode); err != nil {
1206	return n, err
1207	}
1208	x.suffixes = new(suffixarray.Index)
1209	if err := x.suffixes.Read(r); err != nil {
1210	return n, err
1211	}
1212	}
1213	return n, nil
1214	}
1215
1216	// Stats returns index statistics.
1217	func (x *Index) Stats() Statistics {
1218	return x.stats
1219	}
1220
1221	// ImportCount returns a map from import paths to how many times they were seen.
1222	func (x *Index) ImportCount() map[string]int {
1223	return x.importCount
1224	}
1225
1226	// PackagePath returns a map from short package name to a set
1227	// of full package path names that use that short package name.
1228	func (x *Index) PackagePath() map[string]map[string]bool {
1229	return x.packagePath
1230	}
1231
1232	// Exports returns a map from full package path to exported
1233	// symbol name to its type.
1234	func (x *Index) Exports() map[string]map[string]SpotKind {
1235	return x.exports
1236	}
1237
1238	// Idents returns a map from identifier type to exported
1239	// symbol name to the list of identifiers matching that name.
1240	func (x *Index) Idents() map[SpotKind]map[string][]Ident {
1241	return x.idents
1242	}
1243
1244	func (x Index) lookupWord(w string) (match LookupResult, alt *AltWords) {
1245	match = x.words[w]
1246	alt = x.alts[canonical(w)]
1247	// remove current spelling from alternatives
1248	// (if there is no match, the alternatives do
1249	// not contain the current spelling)
1250	if match != nil && alt != nil {
1251	alt = alt.filter(w)
1252	}
1253	return
1254	}
1255
1256	// isIdentifier reports whether s is a Go identifier.
1257	func isIdentifier(s string) bool {
1258	for i, ch := range s {
1259	if unicode.IsLetter(ch) \|\| ch == '_' \|\| i > 0 && unicode.IsDigit(ch) {
1260	continue
1261	}
1262	return false
1263	}
1264	return len(s) > 0
1265	}
1266
1267	// For a given query, which is either a single identifier or a qualified
1268	// identifier, Lookup returns a SearchResult containing packages, a LookupResult, a
1269	// list of alternative spellings, and identifiers, if any. Any and all results
1270	// may be nil. If the query syntax is wrong, an error is reported.
1271	func (x Index) Lookup(query string) (SearchResult, error) {
1272	ss := strings.Split(query, ".")
1273
1274	// check query syntax
1275	for _, s := range ss {
1276	if !isIdentifier(s) {
1277	return nil, errors.New("all query parts must be identifiers")
1278	}
1279	}
1280	rslt := &SearchResult{
1281	Query: query,
1282	Idents: make(map[SpotKind][]Ident, 5),
1283	}
1284	// handle simple and qualified identifiers
1285	switch len(ss) {
1286	case 1:
1287	ident := ss[0]
1288	rslt.Hit, rslt.Alt = x.lookupWord(ident)
1289	if rslt.Hit != nil {
1290	// found a match - filter packages with same name
1291	// for the list of packages called ident, if any
1292	rslt.Pak = rslt.Hit.Others.filter(ident)
1293	}
1294	for k, v := range x.idents {
1295	const rsltLimit = 50
1296	ids := byImportCount{v[ident], x.importCount}
1297	rslt.Idents[k] = ids.top(rsltLimit)
1298	}
1299
1300	case 2:
1301	pakname, ident := ss[0], ss[1]
1302	rslt.Hit, rslt.Alt = x.lookupWord(ident)
1303	if rslt.Hit != nil {
1304	// found a match - filter by package name
1305	// (no paks - package names are not qualified)
1306	decls := rslt.Hit.Decls.filter(pakname)
1307	others := rslt.Hit.Others.filter(pakname)
1308	rslt.Hit = &LookupResult{decls, others}
1309	}
1310	for k, v := range x.idents {
1311	ids := byImportCount{v[ident], x.importCount}
1312	rslt.Idents[k] = ids.filter(pakname)
1313	}
1314
1315	default:
1316	return nil, errors.New("query is not a (qualified) identifier")
1317	}
1318
1319	return rslt, nil
1320	}
1321
1322	func (x Index) Snippet(i int) Snippet {
1323	// handle illegal snippet indices gracefully
1324	if 0 <= i && i < len(x.snippets) {
1325	return x.snippets[i]
1326	}
1327	return nil
1328	}
1329
1330	type positionList []struct {
1331	filename string
1332	line int
1333	}
1334
1335	func (list positionList) Len() int { return len(list) }
1336	func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename }
1337	func (list positionList) Swap(i, j int) { list[i], list[j] = list[j], list[i] }
1338
1339	// unique returns the list sorted and with duplicate entries removed
1340	func unique(list []int) []int {
1341	sort.Ints(list)
1342	var last int
1343	i := 0
1344	for _, x := range list {
1345	if i == 0 \|\| x != last {
1346	last = x
1347	list[i] = x
1348	i++
1349	}
1350	}
1351	return list[0:i]
1352	}
1353
1354	// A FileLines value specifies a file and line numbers within that file.
1355	type FileLines struct {
1356	Filename string
1357	Lines []int
1358	}
1359
1360	// LookupRegexp returns the number of matches and the matches where a regular
1361	// expression r is found in the full text index. At most n matches are
1362	// returned (thus found <= n).
1363	func (x Index) LookupRegexp(r regexp.Regexp, n int) (found int, result []FileLines) {
1364	if x.suffixes == nil \|\| n <= 0 {
1365	return
1366	}
1367	// n > 0
1368
1369	var list positionList
1370	// FindAllIndex may returns matches that span across file boundaries.
1371	// Such matches are unlikely, buf after eliminating them we may end up
1372	// with fewer than n matches. If we don't have enough at the end, redo
1373	// the search with an increased value n1, but only if FindAllIndex
1374	// returned all the requested matches in the first place (if it
1375	// returned fewer than that there cannot be more).
1376	for n1 := n; found < n; n1 += n - found {
1377	found = 0
1378	matches := x.suffixes.FindAllIndex(r, n1)
1379	// compute files, exclude matches that span file boundaries,
1380	// and map offsets to file-local offsets
1381	list = make(positionList, len(matches))
1382	for _, m := range matches {
1383	// by construction, an offset corresponds to the Pos value
1384	// for the file set - use it to get the file and line
1385	p := token.Pos(m[0])
1386	if file := x.fset.File(p); file != nil {
1387	if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() {
1388	// match [m[0], m[1]) is within the file boundaries
1389	list[found].filename = file.Name()
1390	list[found].line = file.Line(p)
1391	found++
1392	}
1393	}
1394	}
1395	if found == n \|\| len(matches) < n1 {
1396	// found all matches or there's no chance to find more
1397	break
1398	}
1399	}
1400	list = list[0:found]
1401	sort.Sort(list) // sort by filename
1402
1403	// collect matches belonging to the same file
1404	var last string
1405	var lines []int
1406	addLines := func() {
1407	if len(lines) > 0 {
1408	// remove duplicate lines
1409	result = append(result, FileLines{last, unique(lines)})
1410	lines = nil
1411	}
1412	}
1413	for _, m := range list {
1414	if m.filename != last {
1415	addLines()
1416	last = m.filename
1417	}
1418	lines = append(lines, m.line)
1419	}
1420	addLines()
1421
1422	return
1423	}
1424
1425	// InvalidateIndex should be called whenever any of the file systems
1426	// under godoc's observation change so that the indexer is kicked on.
1427	func (c *Corpus) invalidateIndex() {
1428	c.fsModified.Set(nil)
1429	c.refreshMetadata()
1430	}
1431
1432	// feedDirnames feeds the directory names of all directories
1433	// under the file system given by root to channel c.
1434	func (c *Corpus) feedDirnames(ch chan<- string) {
1435	if dir, _ := c.fsTree.Get(); dir != nil {
1436	for d := range dir.(*Directory).iter(false) {
1437	ch <- d.Path
1438	}
1439	}
1440	}
1441
1442	// fsDirnames() returns a channel sending all directory names
1443	// of all the file systems under godoc's observation.
1444	func (c *Corpus) fsDirnames() <-chan string {
1445	ch := make(chan string, 256) // buffered for fewer context switches
1446	go func() {
1447	c.feedDirnames(ch)
1448	close(ch)
1449	}()
1450	return ch
1451	}
1452
1453	// CompatibleWith reports whether the Index x is compatible with the corpus
1454	// indexing options set in c.
1455	func (x Index) CompatibleWith(c Corpus) bool {
1456	return x.opts.Docs == c.IndexDocs &&
1457	x.opts.GoCode == c.IndexGoCode &&
1458	x.opts.FullText == c.IndexFullText &&
1459	x.opts.MaxResults == c.MaxResults
1460	}
1461
1462	func (c *Corpus) readIndex(filenames string) error {
1463	matches, err := filepath.Glob(filenames)
1464	if err != nil {
1465	return err
1466	} else if matches == nil {
1467	return fmt.Errorf("no index files match %q", filenames)
1468	}
1469	sort.Strings(matches) // make sure files are in the right order
1470	files := make([]io.Reader, 0, len(matches))
1471	for _, filename := range matches {
1472	f, err := os.Open(filename)
1473	if err != nil {
1474	return err
1475	}
1476	defer f.Close()
1477	files = append(files, f)
1478	}
1479	return c.ReadIndexFrom(io.MultiReader(files...))
1480	}
1481
1482	// ReadIndexFrom sets the current index from the serialized version found in r.
1483	func (c *Corpus) ReadIndexFrom(r io.Reader) error {
1484	x := new(Index)
1485	if _, err := x.ReadFrom(r); err != nil {
1486	return err
1487	}
1488	if !x.CompatibleWith(c) {
1489	return fmt.Errorf("index file options are incompatible: %v", x.opts)
1490	}
1491	c.searchIndex.Set(x)
1492	return nil
1493	}
1494
1495	func (c *Corpus) UpdateIndex() {
1496	if c.Verbose {
1497	log.Printf("updating index...")
1498	}
1499	start := time.Now()
1500	index := c.NewIndex()
1501	stop := time.Now()
1502	c.searchIndex.Set(index)
1503	if c.Verbose {
1504	secs := stop.Sub(start).Seconds()
1505	stats := index.Stats()
1506	log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)",
1507	secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots)
1508	}
1509	memstats := new(runtime.MemStats)
1510	runtime.ReadMemStats(memstats)
1511	if c.Verbose {
1512	log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
1513	}
1514	runtime.GC()
1515	runtime.ReadMemStats(memstats)
1516	if c.Verbose {
1517	log.Printf("after GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
1518	}
1519	}
1520
1521	// RunIndexer runs forever, indexing.
1522	func (c *Corpus) RunIndexer() {
1523	// initialize the index from disk if possible
1524	if c.IndexFiles != "" {
1525	c.initFSTree()
1526	if err := c.readIndex(c.IndexFiles); err != nil {
1527	log.Printf("error reading index from file %s: %v", c.IndexFiles, err)
1528	}
1529	return
1530	}
1531
1532	// Repeatedly update the package directory tree and index.
1533	for {
1534	c.initFSTree()
1535	c.UpdateIndex()
1536	if c.IndexInterval < 0 {
1537	return
1538	}
1539	delay := 5 * time.Minute // by default, reindex every 5 minutes
1540	if c.IndexInterval > 0 {
1541	delay = c.IndexInterval
1542	}
1543	time.Sleep(delay)
1544	}
1545	}
1546
1547	type countingWriter struct {
1548	n *int64
1549	w io.Writer
1550	}
1551
1552	func (c countingWriter) Write(p []byte) (n int, err error) {
1553	n, err = c.w.Write(p)
1554	*c.n += int64(n)
1555	return
1556	}
1557
1558	type byteReader interface {
1559	io.Reader
1560	io.ByteReader
1561	}
1562
1563	type countingReader struct {
1564	n *int64
1565	r byteReader
1566	}
1567
1568	func (c countingReader) Read(p []byte) (n int, err error) {
1569	n, err = c.r.Read(p)
1570	*c.n += int64(n)
1571	return
1572	}
1573
1574	func (c countingReader) ReadByte() (b byte, err error) {
1575	b, err = c.r.ReadByte()
1576	*c.n += 1
1577	return
1578	}
1579