1 | // Copyright 2009 The Go Authors. All rights reserved. |
---|---|
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | // This file contains the infrastructure to create an |
6 | // identifier and full-text index for a set of Go files. |
7 | // |
8 | // Algorithm for identifier index: |
9 | // - traverse all .go files of the file tree specified by root |
10 | // - for each identifier (word) encountered, collect all occurrences (spots) |
11 | // into a list; this produces a list of spots for each word |
12 | // - reduce the lists: from a list of spots to a list of FileRuns, |
13 | // and from a list of FileRuns into a list of PakRuns |
14 | // - make a HitList from the PakRuns |
15 | // |
16 | // Details: |
17 | // - keep two lists per word: one containing package-level declarations |
18 | // that have snippets, and one containing all other spots |
19 | // - keep the snippets in a separate table indexed by snippet index |
20 | // and store the snippet index in place of the line number in a SpotInfo |
21 | // (the line number for spots with snippets is stored in the snippet) |
22 | // - at the end, create lists of alternative spellings for a given |
23 | // word |
24 | // |
25 | // Algorithm for full text index: |
26 | // - concatenate all source code in a byte buffer (in memory) |
27 | // - add the files to a file set in lockstep as they are added to the byte |
28 | // buffer such that a byte buffer offset corresponds to the Pos value for |
29 | // that file location |
30 | // - create a suffix array from the concatenated sources |
31 | // |
32 | // String lookup in full text index: |
33 | // - use the suffix array to lookup a string's offsets - the offsets |
34 | // correspond to the Pos values relative to the file set |
35 | // - translate the Pos values back into file and line information and |
36 | // sort the result |
37 | |
38 | package godoc |
39 | |
40 | import ( |
41 | "bufio" |
42 | "bytes" |
43 | "encoding/gob" |
44 | "errors" |
45 | "fmt" |
46 | "go/ast" |
47 | "go/doc" |
48 | "go/parser" |
49 | "go/token" |
50 | "index/suffixarray" |
51 | "io" |
52 | "log" |
53 | "math" |
54 | "os" |
55 | pathpkg "path" |
56 | "path/filepath" |
57 | "regexp" |
58 | "runtime" |
59 | "sort" |
60 | "strconv" |
61 | "strings" |
62 | "sync" |
63 | "time" |
64 | "unicode" |
65 | |
66 | "golang.org/x/tools/godoc/util" |
67 | "golang.org/x/tools/godoc/vfs" |
68 | ) |
69 | |
70 | // ---------------------------------------------------------------------------- |
71 | // InterfaceSlice is a helper type for sorting interface |
72 | // slices according to some slice-specific sort criteria. |
73 | |
74 | type comparer func(x, y interface{}) bool |
75 | |
76 | type interfaceSlice struct { |
77 | slice []interface{} |
78 | less comparer |
79 | } |
80 | |
81 | // ---------------------------------------------------------------------------- |
82 | // RunList |
83 | |
84 | // A RunList is a list of entries that can be sorted according to some |
85 | // criteria. A RunList may be compressed by grouping "runs" of entries |
86 | // which are equal (according to the sort criteria) into a new RunList of |
87 | // runs. For instance, a RunList containing pairs (x, y) may be compressed |
88 | // into a RunList containing pair runs (x, {y}) where each run consists of |
89 | // a list of y's with the same x. |
90 | type RunList []interface{} |
91 | |
92 | func (h RunList) sort(less comparer) { |
93 | sort.Sort(&interfaceSlice{h, less}) |
94 | } |
95 | |
96 | func (p *interfaceSlice) Len() int { return len(p.slice) } |
97 | func (p *interfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) } |
98 | func (p *interfaceSlice) Swap(i, j int) { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] } |
99 | |
100 | // Compress entries which are the same according to a sort criteria |
101 | // (specified by less) into "runs". |
102 | func (h RunList) reduce(less comparer, newRun func(h RunList) interface{}) RunList { |
103 | if len(h) == 0 { |
104 | return nil |
105 | } |
106 | // len(h) > 0 |
107 | |
108 | // create runs of entries with equal values |
109 | h.sort(less) |
110 | |
111 | // for each run, make a new run object and collect them in a new RunList |
112 | var hh RunList |
113 | i, x := 0, h[0] |
114 | for j, y := range h { |
115 | if less(x, y) { |
116 | hh = append(hh, newRun(h[i:j])) |
117 | i, x = j, h[j] // start a new run |
118 | } |
119 | } |
120 | // add final run, if any |
121 | if i < len(h) { |
122 | hh = append(hh, newRun(h[i:])) |
123 | } |
124 | |
125 | return hh |
126 | } |
127 | |
128 | // ---------------------------------------------------------------------------- |
129 | // KindRun |
130 | |
131 | // Debugging support. Disable to see multiple entries per line. |
132 | const removeDuplicates = true |
133 | |
134 | // A KindRun is a run of SpotInfos of the same kind in a given file. |
135 | // The kind (3 bits) is stored in each SpotInfo element; to find the |
136 | // kind of a KindRun, look at any of its elements. |
137 | type KindRun []SpotInfo |
138 | |
139 | // KindRuns are sorted by line number or index. Since the isIndex bit |
140 | // is always the same for all infos in one list we can compare lori's. |
141 | func (k KindRun) Len() int { return len(k) } |
142 | func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() } |
143 | func (k KindRun) Swap(i, j int) { k[i], k[j] = k[j], k[i] } |
144 | |
145 | // FileRun contents are sorted by Kind for the reduction into KindRuns. |
146 | func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() } |
147 | |
148 | // newKindRun allocates a new KindRun from the SpotInfo run h. |
149 | func newKindRun(h RunList) interface{} { |
150 | run := make(KindRun, len(h)) |
151 | for i, x := range h { |
152 | run[i] = x.(SpotInfo) |
153 | } |
154 | |
155 | // Spots were sorted by file and kind to create this run. |
156 | // Within this run, sort them by line number or index. |
157 | sort.Sort(run) |
158 | |
159 | if removeDuplicates { |
160 | // Since both the lori and kind field must be |
161 | // same for duplicates, and since the isIndex |
162 | // bit is always the same for all infos in one |
163 | // list we can simply compare the entire info. |
164 | k := 0 |
165 | prev := SpotInfo(math.MaxUint32) // an unlikely value |
166 | for _, x := range run { |
167 | if x != prev { |
168 | run[k] = x |
169 | k++ |
170 | prev = x |
171 | } |
172 | } |
173 | run = run[0:k] |
174 | } |
175 | |
176 | return run |
177 | } |
178 | |
179 | // ---------------------------------------------------------------------------- |
180 | // FileRun |
181 | |
182 | // A Pak describes a Go package. |
183 | type Pak struct { |
184 | Path string // path of directory containing the package |
185 | Name string // package name as declared by package clause |
186 | } |
187 | |
188 | // Paks are sorted by name (primary key) and by import path (secondary key). |
189 | func (p *Pak) less(q *Pak) bool { |
190 | return p.Name < q.Name || p.Name == q.Name && p.Path < q.Path |
191 | } |
192 | |
193 | // A File describes a Go file. |
194 | type File struct { |
195 | Name string // directory-local file name |
196 | Pak *Pak // the package to which the file belongs |
197 | } |
198 | |
199 | // Path returns the file path of f. |
200 | func (f *File) Path() string { |
201 | return pathpkg.Join(f.Pak.Path, f.Name) |
202 | } |
203 | |
204 | // A Spot describes a single occurrence of a word. |
205 | type Spot struct { |
206 | File *File |
207 | Info SpotInfo |
208 | } |
209 | |
210 | // A FileRun is a list of KindRuns belonging to the same file. |
211 | type FileRun struct { |
212 | File *File |
213 | Groups []KindRun |
214 | } |
215 | |
216 | // Spots are sorted by file path for the reduction into FileRuns. |
217 | func lessSpot(x, y interface{}) bool { |
218 | fx := x.(Spot).File |
219 | fy := y.(Spot).File |
220 | // same as "return fx.Path() < fy.Path()" but w/o computing the file path first |
221 | px := fx.Pak.Path |
222 | py := fy.Pak.Path |
223 | return px < py || px == py && fx.Name < fy.Name |
224 | } |
225 | |
226 | // newFileRun allocates a new FileRun from the Spot run h. |
227 | func newFileRun(h RunList) interface{} { |
228 | file := h[0].(Spot).File |
229 | |
230 | // reduce the list of Spots into a list of KindRuns |
231 | h1 := make(RunList, len(h)) |
232 | for i, x := range h { |
233 | h1[i] = x.(Spot).Info |
234 | } |
235 | h2 := h1.reduce(lessKind, newKindRun) |
236 | |
237 | // create the FileRun |
238 | groups := make([]KindRun, len(h2)) |
239 | for i, x := range h2 { |
240 | groups[i] = x.(KindRun) |
241 | } |
242 | return &FileRun{file, groups} |
243 | } |
244 | |
245 | // ---------------------------------------------------------------------------- |
246 | // PakRun |
247 | |
248 | // A PakRun describes a run of *FileRuns of a package. |
249 | type PakRun struct { |
250 | Pak *Pak |
251 | Files []*FileRun |
252 | } |
253 | |
254 | // Sorting support for files within a PakRun. |
255 | func (p *PakRun) Len() int { return len(p.Files) } |
256 | func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name } |
257 | func (p *PakRun) Swap(i, j int) { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] } |
258 | |
259 | // FileRuns are sorted by package for the reduction into PakRuns. |
260 | func lessFileRun(x, y interface{}) bool { |
261 | return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak) |
262 | } |
263 | |
264 | // newPakRun allocates a new PakRun from the *FileRun run h. |
265 | func newPakRun(h RunList) interface{} { |
266 | pak := h[0].(*FileRun).File.Pak |
267 | files := make([]*FileRun, len(h)) |
268 | for i, x := range h { |
269 | files[i] = x.(*FileRun) |
270 | } |
271 | run := &PakRun{pak, files} |
272 | sort.Sort(run) // files were sorted by package; sort them by file now |
273 | return run |
274 | } |
275 | |
276 | // ---------------------------------------------------------------------------- |
277 | // HitList |
278 | |
279 | // A HitList describes a list of PakRuns. |
280 | type HitList []*PakRun |
281 | |
282 | // PakRuns are sorted by package. |
283 | func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) } |
284 | |
285 | func reduce(h0 RunList) HitList { |
286 | // reduce a list of Spots into a list of FileRuns |
287 | h1 := h0.reduce(lessSpot, newFileRun) |
288 | // reduce a list of FileRuns into a list of PakRuns |
289 | h2 := h1.reduce(lessFileRun, newPakRun) |
290 | // sort the list of PakRuns by package |
291 | h2.sort(lessPakRun) |
292 | // create a HitList |
293 | h := make(HitList, len(h2)) |
294 | for i, p := range h2 { |
295 | h[i] = p.(*PakRun) |
296 | } |
297 | return h |
298 | } |
299 | |
300 | // filter returns a new HitList created by filtering |
301 | // all PakRuns from h that have a matching pakname. |
302 | func (h HitList) filter(pakname string) HitList { |
303 | var hh HitList |
304 | for _, p := range h { |
305 | if p.Pak.Name == pakname { |
306 | hh = append(hh, p) |
307 | } |
308 | } |
309 | return hh |
310 | } |
311 | |
312 | // ---------------------------------------------------------------------------- |
313 | // AltWords |
314 | |
315 | type wordPair struct { |
316 | canon string // canonical word spelling (all lowercase) |
317 | alt string // alternative spelling |
318 | } |
319 | |
320 | // An AltWords describes a list of alternative spellings for a |
321 | // canonical (all lowercase) spelling of a word. |
322 | type AltWords struct { |
323 | Canon string // canonical word spelling (all lowercase) |
324 | Alts []string // alternative spelling for the same word |
325 | } |
326 | |
327 | // wordPairs are sorted by their canonical spelling. |
328 | func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon } |
329 | |
330 | // newAltWords allocates a new AltWords from the *wordPair run h. |
331 | func newAltWords(h RunList) interface{} { |
332 | canon := h[0].(*wordPair).canon |
333 | alts := make([]string, len(h)) |
334 | for i, x := range h { |
335 | alts[i] = x.(*wordPair).alt |
336 | } |
337 | return &AltWords{canon, alts} |
338 | } |
339 | |
340 | func (a *AltWords) filter(s string) *AltWords { |
341 | var alts []string |
342 | for _, w := range a.Alts { |
343 | if w != s { |
344 | alts = append(alts, w) |
345 | } |
346 | } |
347 | if len(alts) > 0 { |
348 | return &AltWords{a.Canon, alts} |
349 | } |
350 | return nil |
351 | } |
352 | |
353 | // Ident stores information about external identifiers in order to create |
354 | // links to package documentation. |
355 | type Ident struct { |
356 | Path string // e.g. "net/http" |
357 | Package string // e.g. "http" |
358 | Name string // e.g. "NewRequest" |
359 | Doc string // e.g. "NewRequest returns a new Request..." |
360 | } |
361 | |
362 | // byImportCount sorts the given slice of Idents by the import |
363 | // counts of the packages to which they belong. |
364 | type byImportCount struct { |
365 | Idents []Ident |
366 | ImportCount map[string]int |
367 | } |
368 | |
369 | func (ic byImportCount) Len() int { |
370 | return len(ic.Idents) |
371 | } |
372 | |
373 | func (ic byImportCount) Less(i, j int) bool { |
374 | ri := ic.ImportCount[ic.Idents[i].Path] |
375 | rj := ic.ImportCount[ic.Idents[j].Path] |
376 | if ri == rj { |
377 | return ic.Idents[i].Path < ic.Idents[j].Path |
378 | } |
379 | return ri > rj |
380 | } |
381 | |
382 | func (ic byImportCount) Swap(i, j int) { |
383 | ic.Idents[i], ic.Idents[j] = ic.Idents[j], ic.Idents[i] |
384 | } |
385 | |
386 | func (ic byImportCount) String() string { |
387 | buf := bytes.NewBuffer([]byte("[")) |
388 | for _, v := range ic.Idents { |
389 | buf.WriteString(fmt.Sprintf("\n\t%s, %s (%d)", v.Path, v.Name, ic.ImportCount[v.Path])) |
390 | } |
391 | buf.WriteString("\n]") |
392 | return buf.String() |
393 | } |
394 | |
395 | // filter creates a new Ident list where the results match the given |
396 | // package name. |
397 | func (ic byImportCount) filter(pakname string) []Ident { |
398 | if ic.Idents == nil { |
399 | return nil |
400 | } |
401 | var res []Ident |
402 | for _, i := range ic.Idents { |
403 | if i.Package == pakname { |
404 | res = append(res, i) |
405 | } |
406 | } |
407 | return res |
408 | } |
409 | |
410 | // top returns the top n identifiers. |
411 | func (ic byImportCount) top(n int) []Ident { |
412 | if len(ic.Idents) > n { |
413 | return ic.Idents[:n] |
414 | } |
415 | return ic.Idents |
416 | } |
417 | |
418 | // ---------------------------------------------------------------------------- |
419 | // Indexer |
420 | |
421 | type IndexResult struct { |
422 | Decls RunList // package-level declarations (with snippets) |
423 | Others RunList // all other occurrences |
424 | } |
425 | |
426 | // Statistics provides statistics information for an index. |
427 | type Statistics struct { |
428 | Bytes int // total size of indexed source files |
429 | Files int // number of indexed source files |
430 | Lines int // number of lines (all files) |
431 | Words int // number of different identifiers |
432 | Spots int // number of identifier occurrences |
433 | } |
434 | |
435 | // An Indexer maintains the data structures and provides the machinery |
436 | // for indexing .go files under a file tree. It implements the path.Visitor |
437 | // interface for walking file trees, and the ast.Visitor interface for |
438 | // walking Go ASTs. |
439 | type Indexer struct { |
440 | c *Corpus |
441 | fset *token.FileSet // file set for all indexed files |
442 | fsOpenGate chan bool // send pre fs.Open; receive on close |
443 | |
444 | mu sync.Mutex // guards all the following |
445 | sources bytes.Buffer // concatenated sources |
446 | strings map[string]string // interned string |
447 | packages map[Pak]*Pak // interned *Paks |
448 | words map[string]*IndexResult // RunLists of Spots |
449 | snippets []*Snippet // indices are stored in SpotInfos |
450 | current *token.File // last file added to file set |
451 | file *File // AST for current file |
452 | decl ast.Decl // AST for current decl |
453 | stats Statistics |
454 | throttle *util.Throttle |
455 | importCount map[string]int // package path ("net/http") => count |
456 | packagePath map[string]map[string]bool // "template" => "text/template" => true |
457 | exports map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl |
458 | curPkgExports map[string]SpotKind |
459 | idents map[SpotKind]map[string][]Ident // kind => name => list of Idents |
460 | } |
461 | |
462 | func (x *Indexer) intern(s string) string { |
463 | if s, ok := x.strings[s]; ok { |
464 | return s |
465 | } |
466 | x.strings[s] = s |
467 | return s |
468 | } |
469 | |
470 | func (x *Indexer) lookupPackage(path, name string) *Pak { |
471 | // In the source directory tree, more than one package may |
472 | // live in the same directory. For the packages map, construct |
473 | // a key that includes both the directory path and the package |
474 | // name. |
475 | key := Pak{Path: x.intern(path), Name: x.intern(name)} |
476 | pak := x.packages[key] |
477 | if pak == nil { |
478 | pak = &key |
479 | x.packages[key] = pak |
480 | } |
481 | return pak |
482 | } |
483 | |
484 | func (x *Indexer) addSnippet(s *Snippet) int { |
485 | index := len(x.snippets) |
486 | x.snippets = append(x.snippets, s) |
487 | return index |
488 | } |
489 | |
490 | func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) { |
491 | if id == nil { |
492 | return |
493 | } |
494 | name := x.intern(id.Name) |
495 | |
496 | switch kind { |
497 | case TypeDecl, FuncDecl, ConstDecl, VarDecl: |
498 | x.curPkgExports[name] = kind |
499 | } |
500 | |
501 | lists, found := x.words[name] |
502 | if !found { |
503 | lists = new(IndexResult) |
504 | x.words[name] = lists |
505 | } |
506 | |
507 | if kind == Use || x.decl == nil { |
508 | if x.c.IndexGoCode { |
509 | // not a declaration or no snippet required |
510 | info := makeSpotInfo(kind, x.current.Line(id.Pos()), false) |
511 | lists.Others = append(lists.Others, Spot{x.file, info}) |
512 | } |
513 | } else { |
514 | // a declaration with snippet |
515 | index := x.addSnippet(NewSnippet(x.fset, x.decl, id)) |
516 | info := makeSpotInfo(kind, index, true) |
517 | lists.Decls = append(lists.Decls, Spot{x.file, info}) |
518 | } |
519 | |
520 | x.stats.Spots++ |
521 | } |
522 | |
523 | func (x *Indexer) visitFieldList(kind SpotKind, flist *ast.FieldList) { |
524 | for _, f := range flist.List { |
525 | x.decl = nil // no snippets for fields |
526 | for _, name := range f.Names { |
527 | x.visitIdent(kind, name) |
528 | } |
529 | ast.Walk(x, f.Type) |
530 | // ignore tag - not indexed at the moment |
531 | } |
532 | } |
533 | |
534 | func (x *Indexer) visitSpec(kind SpotKind, spec ast.Spec) { |
535 | switch n := spec.(type) { |
536 | case *ast.ImportSpec: |
537 | x.visitIdent(ImportDecl, n.Name) |
538 | if n.Path != nil { |
539 | if imp, err := strconv.Unquote(n.Path.Value); err == nil { |
540 | x.importCount[x.intern(imp)]++ |
541 | } |
542 | } |
543 | |
544 | case *ast.ValueSpec: |
545 | for _, n := range n.Names { |
546 | x.visitIdent(kind, n) |
547 | } |
548 | ast.Walk(x, n.Type) |
549 | for _, v := range n.Values { |
550 | ast.Walk(x, v) |
551 | } |
552 | |
553 | case *ast.TypeSpec: |
554 | x.visitIdent(TypeDecl, n.Name) |
555 | ast.Walk(x, n.Type) |
556 | } |
557 | } |
558 | |
559 | func (x *Indexer) visitGenDecl(decl *ast.GenDecl) { |
560 | kind := VarDecl |
561 | if decl.Tok == token.CONST { |
562 | kind = ConstDecl |
563 | } |
564 | x.decl = decl |
565 | for _, s := range decl.Specs { |
566 | x.visitSpec(kind, s) |
567 | } |
568 | } |
569 | |
570 | func (x *Indexer) Visit(node ast.Node) ast.Visitor { |
571 | switch n := node.(type) { |
572 | case nil: |
573 | // nothing to do |
574 | |
575 | case *ast.Ident: |
576 | x.visitIdent(Use, n) |
577 | |
578 | case *ast.FieldList: |
579 | x.visitFieldList(VarDecl, n) |
580 | |
581 | case *ast.InterfaceType: |
582 | x.visitFieldList(MethodDecl, n.Methods) |
583 | |
584 | case *ast.DeclStmt: |
585 | // local declarations should only be *ast.GenDecls; |
586 | // ignore incorrect ASTs |
587 | if decl, ok := n.Decl.(*ast.GenDecl); ok { |
588 | x.decl = nil // no snippets for local declarations |
589 | x.visitGenDecl(decl) |
590 | } |
591 | |
592 | case *ast.GenDecl: |
593 | x.decl = n |
594 | x.visitGenDecl(n) |
595 | |
596 | case *ast.FuncDecl: |
597 | kind := FuncDecl |
598 | if n.Recv != nil { |
599 | kind = MethodDecl |
600 | ast.Walk(x, n.Recv) |
601 | } |
602 | x.decl = n |
603 | x.visitIdent(kind, n.Name) |
604 | ast.Walk(x, n.Type) |
605 | if n.Body != nil { |
606 | ast.Walk(x, n.Body) |
607 | } |
608 | |
609 | case *ast.File: |
610 | x.decl = nil |
611 | x.visitIdent(PackageClause, n.Name) |
612 | for _, d := range n.Decls { |
613 | ast.Walk(x, d) |
614 | } |
615 | |
616 | default: |
617 | return x |
618 | } |
619 | |
620 | return nil |
621 | } |
622 | |
623 | // addFile adds a file to the index if possible and returns the file set file |
624 | // and the file's AST if it was successfully parsed as a Go file. If addFile |
625 | // failed (that is, if the file was not added), it returns file == nil. |
626 | func (x *Indexer) addFile(f vfs.ReadSeekCloser, filename string, goFile bool) (file *token.File, ast *ast.File) { |
627 | defer f.Close() |
628 | |
629 | // The file set's base offset and x.sources size must be in lock-step; |
630 | // this permits the direct mapping of suffix array lookup results to |
631 | // to corresponding Pos values. |
632 | // |
633 | // When a file is added to the file set, its offset base increases by |
634 | // the size of the file + 1; and the initial base offset is 1. Add an |
635 | // extra byte to the sources here. |
636 | x.sources.WriteByte(0) |
637 | |
638 | // If the sources length doesn't match the file set base at this point |
639 | // the file set implementation changed or we have another error. |
640 | base := x.fset.Base() |
641 | if x.sources.Len() != base { |
642 | panic("internal error: file base incorrect") |
643 | } |
644 | |
645 | // append file contents (src) to x.sources |
646 | if _, err := x.sources.ReadFrom(f); err == nil { |
647 | src := x.sources.Bytes()[base:] |
648 | |
649 | if goFile { |
650 | // parse the file and in the process add it to the file set |
651 | if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil { |
652 | file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file |
653 | return |
654 | } |
655 | // file has parse errors, and the AST may be incorrect - |
656 | // set lines information explicitly and index as ordinary |
657 | // text file (cannot fall through to the text case below |
658 | // because the file has already been added to the file set |
659 | // by the parser) |
660 | file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file |
661 | file.SetLinesForContent(src) |
662 | ast = nil |
663 | return |
664 | } |
665 | |
666 | if util.IsText(src) { |
667 | // only add the file to the file set (for the full text index) |
668 | file = x.fset.AddFile(filename, x.fset.Base(), len(src)) |
669 | file.SetLinesForContent(src) |
670 | return |
671 | } |
672 | } |
673 | |
674 | // discard possibly added data |
675 | x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added |
676 | return |
677 | } |
678 | |
679 | // Design note: Using an explicit white list of permitted files for indexing |
680 | // makes sure that the important files are included and massively reduces the |
681 | // number of files to index. The advantage over a blacklist is that unexpected |
682 | // (non-blacklisted) files won't suddenly explode the index. |
683 | |
684 | // Files are whitelisted if they have a file name or extension |
685 | // present as key in whitelisted. |
686 | var whitelisted = map[string]bool{ |
687 | ".bash": true, |
688 | ".c": true, |
689 | ".cc": true, |
690 | ".cpp": true, |
691 | ".cxx": true, |
692 | ".css": true, |
693 | ".go": true, |
694 | ".goc": true, |
695 | ".h": true, |
696 | ".hh": true, |
697 | ".hpp": true, |
698 | ".hxx": true, |
699 | ".html": true, |
700 | ".js": true, |
701 | ".out": true, |
702 | ".py": true, |
703 | ".s": true, |
704 | ".sh": true, |
705 | ".txt": true, |
706 | ".xml": true, |
707 | "AUTHORS": true, |
708 | "CONTRIBUTORS": true, |
709 | "LICENSE": true, |
710 | "Makefile": true, |
711 | "PATENTS": true, |
712 | "README": true, |
713 | } |
714 | |
715 | // isWhitelisted returns true if a file is on the list |
716 | // of "permitted" files for indexing. The filename must |
717 | // be the directory-local name of the file. |
718 | func isWhitelisted(filename string) bool { |
719 | key := pathpkg.Ext(filename) |
720 | if key == "" { |
721 | // file has no extension - use entire filename |
722 | key = filename |
723 | } |
724 | return whitelisted[key] |
725 | } |
726 | |
727 | func (x *Indexer) indexDocs(dirname string, filename string, astFile *ast.File) { |
728 | pkgName := x.intern(astFile.Name.Name) |
729 | if pkgName == "main" { |
730 | return |
731 | } |
732 | pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/")) |
733 | astPkg := ast.Package{ |
734 | Name: pkgName, |
735 | Files: map[string]*ast.File{ |
736 | filename: astFile, |
737 | }, |
738 | } |
739 | var m doc.Mode |
740 | docPkg := doc.New(&astPkg, dirname, m) |
741 | addIdent := func(sk SpotKind, name string, docstr string) { |
742 | if x.idents[sk] == nil { |
743 | x.idents[sk] = make(map[string][]Ident) |
744 | } |
745 | name = x.intern(name) |
746 | x.idents[sk][name] = append(x.idents[sk][name], Ident{ |
747 | Path: pkgPath, |
748 | Package: pkgName, |
749 | Name: name, |
750 | Doc: doc.Synopsis(docstr), |
751 | }) |
752 | } |
753 | |
754 | if x.idents[PackageClause] == nil { |
755 | x.idents[PackageClause] = make(map[string][]Ident) |
756 | } |
757 | // List of words under which the package identifier will be stored. |
758 | // This includes the package name and the components of the directory |
759 | // in which it resides. |
760 | words := strings.Split(pathpkg.Dir(pkgPath), "/") |
761 | if words[0] == "." { |
762 | words = []string{} |
763 | } |
764 | name := x.intern(docPkg.Name) |
765 | synopsis := doc.Synopsis(docPkg.Doc) |
766 | words = append(words, name) |
767 | pkgIdent := Ident{ |
768 | Path: pkgPath, |
769 | Package: pkgName, |
770 | Name: name, |
771 | Doc: synopsis, |
772 | } |
773 | for _, word := range words { |
774 | word = x.intern(word) |
775 | found := false |
776 | pkgs := x.idents[PackageClause][word] |
777 | for i, p := range pkgs { |
778 | if p.Path == pkgPath { |
779 | if docPkg.Doc != "" { |
780 | p.Doc = synopsis |
781 | pkgs[i] = p |
782 | } |
783 | found = true |
784 | break |
785 | } |
786 | } |
787 | if !found { |
788 | x.idents[PackageClause][word] = append(x.idents[PackageClause][word], pkgIdent) |
789 | } |
790 | } |
791 | |
792 | for _, c := range docPkg.Consts { |
793 | for _, name := range c.Names { |
794 | addIdent(ConstDecl, name, c.Doc) |
795 | } |
796 | } |
797 | for _, t := range docPkg.Types { |
798 | addIdent(TypeDecl, t.Name, t.Doc) |
799 | for _, c := range t.Consts { |
800 | for _, name := range c.Names { |
801 | addIdent(ConstDecl, name, c.Doc) |
802 | } |
803 | } |
804 | for _, v := range t.Vars { |
805 | for _, name := range v.Names { |
806 | addIdent(VarDecl, name, v.Doc) |
807 | } |
808 | } |
809 | for _, f := range t.Funcs { |
810 | addIdent(FuncDecl, f.Name, f.Doc) |
811 | } |
812 | for _, f := range t.Methods { |
813 | addIdent(MethodDecl, f.Name, f.Doc) |
814 | // Change the name of methods to be "<typename>.<methodname>". |
815 | // They will still be indexed as <methodname>. |
816 | idents := x.idents[MethodDecl][f.Name] |
817 | idents[len(idents)-1].Name = x.intern(t.Name + "." + f.Name) |
818 | } |
819 | } |
820 | for _, v := range docPkg.Vars { |
821 | for _, name := range v.Names { |
822 | addIdent(VarDecl, name, v.Doc) |
823 | } |
824 | } |
825 | for _, f := range docPkg.Funcs { |
826 | addIdent(FuncDecl, f.Name, f.Doc) |
827 | } |
828 | } |
829 | |
830 | func (x *Indexer) indexGoFile(dirname string, filename string, file *token.File, astFile *ast.File) { |
831 | pkgName := astFile.Name.Name |
832 | |
833 | if x.c.IndexGoCode { |
834 | x.current = file |
835 | pak := x.lookupPackage(dirname, pkgName) |
836 | x.file = &File{filename, pak} |
837 | ast.Walk(x, astFile) |
838 | } |
839 | |
840 | if x.c.IndexDocs { |
841 | // Test files are already filtered out in visitFile if IndexGoCode and |
842 | // IndexFullText are false. Otherwise, check here. |
843 | isTestFile := (x.c.IndexGoCode || x.c.IndexFullText) && |
844 | (strings.HasSuffix(filename, "_test.go") || strings.HasPrefix(dirname, "/test/")) |
845 | if !isTestFile { |
846 | x.indexDocs(dirname, filename, astFile) |
847 | } |
848 | } |
849 | |
850 | ppKey := x.intern(pkgName) |
851 | if _, ok := x.packagePath[ppKey]; !ok { |
852 | x.packagePath[ppKey] = make(map[string]bool) |
853 | } |
854 | pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/")) |
855 | x.packagePath[ppKey][pkgPath] = true |
856 | |
857 | // Merge in exported symbols found walking this file into |
858 | // the map for that package. |
859 | if len(x.curPkgExports) > 0 { |
860 | dest, ok := x.exports[pkgPath] |
861 | if !ok { |
862 | dest = make(map[string]SpotKind) |
863 | x.exports[pkgPath] = dest |
864 | } |
865 | for k, v := range x.curPkgExports { |
866 | dest[k] = v |
867 | } |
868 | } |
869 | } |
870 | |
871 | func (x *Indexer) visitFile(dirname string, fi os.FileInfo) { |
872 | if fi.IsDir() || !x.c.IndexEnabled { |
873 | return |
874 | } |
875 | |
876 | filename := pathpkg.Join(dirname, fi.Name()) |
877 | goFile := isGoFile(fi) |
878 | |
879 | switch { |
880 | case x.c.IndexFullText: |
881 | if !isWhitelisted(fi.Name()) { |
882 | return |
883 | } |
884 | case x.c.IndexGoCode: |
885 | if !goFile { |
886 | return |
887 | } |
888 | case x.c.IndexDocs: |
889 | if !goFile || |
890 | strings.HasSuffix(fi.Name(), "_test.go") || |
891 | strings.HasPrefix(dirname, "/test/") { |
892 | return |
893 | } |
894 | default: |
895 | // No indexing turned on. |
896 | return |
897 | } |
898 | |
899 | x.fsOpenGate <- true |
900 | defer func() { <-x.fsOpenGate }() |
901 | |
902 | // open file |
903 | f, err := x.c.fs.Open(filename) |
904 | if err != nil { |
905 | return |
906 | } |
907 | |
908 | x.mu.Lock() |
909 | defer x.mu.Unlock() |
910 | |
911 | x.throttle.Throttle() |
912 | |
913 | x.curPkgExports = make(map[string]SpotKind) |
914 | file, fast := x.addFile(f, filename, goFile) |
915 | if file == nil { |
916 | return // addFile failed |
917 | } |
918 | |
919 | if fast != nil { |
920 | x.indexGoFile(dirname, fi.Name(), file, fast) |
921 | } |
922 | |
923 | // update statistics |
924 | x.stats.Bytes += file.Size() |
925 | x.stats.Files++ |
926 | x.stats.Lines += file.LineCount() |
927 | } |
928 | |
929 | // indexOptions contains information that affects the contents of an index. |
930 | type indexOptions struct { |
931 | // Docs provides documentation search results. |
932 | // It is only consulted if IndexEnabled is true. |
933 | // The default values is true. |
934 | Docs bool |
935 | |
936 | // GoCode provides Go source code search results. |
937 | // It is only consulted if IndexEnabled is true. |
938 | // The default values is true. |
939 | GoCode bool |
940 | |
941 | // FullText provides search results from all files. |
942 | // It is only consulted if IndexEnabled is true. |
943 | // The default values is true. |
944 | FullText bool |
945 | |
946 | // MaxResults optionally specifies the maximum results for indexing. |
947 | // The default is 1000. |
948 | MaxResults int |
949 | } |
950 | |
951 | // ---------------------------------------------------------------------------- |
952 | // Index |
953 | |
954 | type LookupResult struct { |
955 | Decls HitList // package-level declarations (with snippets) |
956 | Others HitList // all other occurrences |
957 | } |
958 | |
959 | type Index struct { |
960 | fset *token.FileSet // file set used during indexing; nil if no textindex |
961 | suffixes *suffixarray.Index // suffixes for concatenated sources; nil if no textindex |
962 | words map[string]*LookupResult // maps words to hit lists |
963 | alts map[string]*AltWords // maps canonical(words) to lists of alternative spellings |
964 | snippets []*Snippet // all snippets, indexed by snippet index |
965 | stats Statistics |
966 | importCount map[string]int // package path ("net/http") => count |
967 | packagePath map[string]map[string]bool // "template" => "text/template" => true |
968 | exports map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl |
969 | idents map[SpotKind]map[string][]Ident |
970 | opts indexOptions |
971 | } |
972 | |
973 | func canonical(w string) string { return strings.ToLower(w) } |
974 | |
975 | // Somewhat arbitrary, but I figure low enough to not hurt disk-based filesystems |
976 | // consuming file descriptors, where some systems have low 256 or 512 limits. |
977 | // Go should have a built-in way to cap fd usage under the ulimit. |
978 | const ( |
979 | maxOpenFiles = 200 |
980 | maxOpenDirs = 50 |
981 | ) |
982 | |
983 | func (c *Corpus) throttle() float64 { |
984 | if c.IndexThrottle <= 0 { |
985 | return 0.9 |
986 | } |
987 | if c.IndexThrottle > 1.0 { |
988 | return 1.0 |
989 | } |
990 | return c.IndexThrottle |
991 | } |
992 | |
993 | // NewIndex creates a new index for the .go files provided by the corpus. |
994 | func (c *Corpus) NewIndex() *Index { |
995 | // initialize Indexer |
996 | // (use some reasonably sized maps to start) |
997 | x := &Indexer{ |
998 | c: c, |
999 | fset: token.NewFileSet(), |
1000 | fsOpenGate: make(chan bool, maxOpenFiles), |
1001 | strings: make(map[string]string), |
1002 | packages: make(map[Pak]*Pak, 256), |
1003 | words: make(map[string]*IndexResult, 8192), |
1004 | throttle: util.NewThrottle(c.throttle(), 100*time.Millisecond), // run at least 0.1s at a time |
1005 | importCount: make(map[string]int), |
1006 | packagePath: make(map[string]map[string]bool), |
1007 | exports: make(map[string]map[string]SpotKind), |
1008 | idents: make(map[SpotKind]map[string][]Ident, 4), |
1009 | } |
1010 | |
1011 | // index all files in the directories given by dirnames |
1012 | var wg sync.WaitGroup // outstanding ReadDir + visitFile |
1013 | dirGate := make(chan bool, maxOpenDirs) |
1014 | for dirname := range c.fsDirnames() { |
1015 | if c.IndexDirectory != nil && !c.IndexDirectory(dirname) { |
1016 | continue |
1017 | } |
1018 | dirGate <- true |
1019 | wg.Add(1) |
1020 | go func(dirname string) { |
1021 | defer func() { <-dirGate }() |
1022 | defer wg.Done() |
1023 | |
1024 | list, err := c.fs.ReadDir(dirname) |
1025 | if err != nil { |
1026 | log.Printf("ReadDir(%q): %v; skipping directory", dirname, err) |
1027 | return // ignore this directory |
1028 | } |
1029 | for _, fi := range list { |
1030 | wg.Add(1) |
1031 | go func(fi os.FileInfo) { |
1032 | defer wg.Done() |
1033 | x.visitFile(dirname, fi) |
1034 | }(fi) |
1035 | } |
1036 | }(dirname) |
1037 | } |
1038 | wg.Wait() |
1039 | |
1040 | if !c.IndexFullText { |
1041 | // the file set, the current file, and the sources are |
1042 | // not needed after indexing if no text index is built - |
1043 | // help GC and clear them |
1044 | x.fset = nil |
1045 | x.sources.Reset() |
1046 | x.current = nil // contains reference to fset! |
1047 | } |
1048 | |
1049 | // for each word, reduce the RunLists into a LookupResult; |
1050 | // also collect the word with its canonical spelling in a |
1051 | // word list for later computation of alternative spellings |
1052 | words := make(map[string]*LookupResult) |
1053 | var wlist RunList |
1054 | for w, h := range x.words { |
1055 | decls := reduce(h.Decls) |
1056 | others := reduce(h.Others) |
1057 | words[w] = &LookupResult{ |
1058 | Decls: decls, |
1059 | Others: others, |
1060 | } |
1061 | wlist = append(wlist, &wordPair{canonical(w), w}) |
1062 | x.throttle.Throttle() |
1063 | } |
1064 | x.stats.Words = len(words) |
1065 | |
1066 | // reduce the word list {canonical(w), w} into |
1067 | // a list of AltWords runs {canonical(w), {w}} |
1068 | alist := wlist.reduce(lessWordPair, newAltWords) |
1069 | |
1070 | // convert alist into a map of alternative spellings |
1071 | alts := make(map[string]*AltWords) |
1072 | for i := 0; i < len(alist); i++ { |
1073 | a := alist[i].(*AltWords) |
1074 | alts[a.Canon] = a |
1075 | } |
1076 | |
1077 | // create text index |
1078 | var suffixes *suffixarray.Index |
1079 | if c.IndexFullText { |
1080 | suffixes = suffixarray.New(x.sources.Bytes()) |
1081 | } |
1082 | |
1083 | // sort idents by the number of imports of their respective packages |
1084 | for _, idMap := range x.idents { |
1085 | for _, ir := range idMap { |
1086 | sort.Sort(byImportCount{ir, x.importCount}) |
1087 | } |
1088 | } |
1089 | |
1090 | return &Index{ |
1091 | fset: x.fset, |
1092 | suffixes: suffixes, |
1093 | words: words, |
1094 | alts: alts, |
1095 | snippets: x.snippets, |
1096 | stats: x.stats, |
1097 | importCount: x.importCount, |
1098 | packagePath: x.packagePath, |
1099 | exports: x.exports, |
1100 | idents: x.idents, |
1101 | opts: indexOptions{ |
1102 | Docs: x.c.IndexDocs, |
1103 | GoCode: x.c.IndexGoCode, |
1104 | FullText: x.c.IndexFullText, |
1105 | MaxResults: x.c.MaxResults, |
1106 | }, |
1107 | } |
1108 | } |
1109 | |
1110 | var ErrFileIndexVersion = errors.New("file index version out of date") |
1111 | |
1112 | const fileIndexVersion = 3 |
1113 | |
1114 | // fileIndex is the subset of Index that's gob-encoded for use by |
1115 | // Index.Write and Index.Read. |
1116 | type fileIndex struct { |
1117 | Version int |
1118 | Words map[string]*LookupResult |
1119 | Alts map[string]*AltWords |
1120 | Snippets []*Snippet |
1121 | Fulltext bool |
1122 | Stats Statistics |
1123 | ImportCount map[string]int |
1124 | PackagePath map[string]map[string]bool |
1125 | Exports map[string]map[string]SpotKind |
1126 | Idents map[SpotKind]map[string][]Ident |
1127 | Opts indexOptions |
1128 | } |
1129 | |
1130 | func (x *fileIndex) Write(w io.Writer) error { |
1131 | return gob.NewEncoder(w).Encode(x) |
1132 | } |
1133 | |
1134 | func (x *fileIndex) Read(r io.Reader) error { |
1135 | return gob.NewDecoder(r).Decode(x) |
1136 | } |
1137 | |
1138 | // WriteTo writes the index x to w. |
1139 | func (x *Index) WriteTo(w io.Writer) (n int64, err error) { |
1140 | w = countingWriter{&n, w} |
1141 | fulltext := false |
1142 | if x.suffixes != nil { |
1143 | fulltext = true |
1144 | } |
1145 | fx := fileIndex{ |
1146 | Version: fileIndexVersion, |
1147 | Words: x.words, |
1148 | Alts: x.alts, |
1149 | Snippets: x.snippets, |
1150 | Fulltext: fulltext, |
1151 | Stats: x.stats, |
1152 | ImportCount: x.importCount, |
1153 | PackagePath: x.packagePath, |
1154 | Exports: x.exports, |
1155 | Idents: x.idents, |
1156 | Opts: x.opts, |
1157 | } |
1158 | if err := fx.Write(w); err != nil { |
1159 | return 0, err |
1160 | } |
1161 | if fulltext { |
1162 | encode := func(x interface{}) error { |
1163 | return gob.NewEncoder(w).Encode(x) |
1164 | } |
1165 | if err := x.fset.Write(encode); err != nil { |
1166 | return 0, err |
1167 | } |
1168 | if err := x.suffixes.Write(w); err != nil { |
1169 | return 0, err |
1170 | } |
1171 | } |
1172 | return n, nil |
1173 | } |
1174 | |
1175 | // ReadFrom reads the index from r into x; x must not be nil. |
1176 | // If r does not also implement io.ByteReader, it will be wrapped in a bufio.Reader. |
1177 | // If the index is from an old version, the error is ErrFileIndexVersion. |
1178 | func (x *Index) ReadFrom(r io.Reader) (n int64, err error) { |
1179 | // We use the ability to read bytes as a plausible surrogate for buffering. |
1180 | if _, ok := r.(io.ByteReader); !ok { |
1181 | r = bufio.NewReader(r) |
1182 | } |
1183 | r = countingReader{&n, r.(byteReader)} |
1184 | var fx fileIndex |
1185 | if err := fx.Read(r); err != nil { |
1186 | return n, err |
1187 | } |
1188 | if fx.Version != fileIndexVersion { |
1189 | return 0, ErrFileIndexVersion |
1190 | } |
1191 | x.words = fx.Words |
1192 | x.alts = fx.Alts |
1193 | x.snippets = fx.Snippets |
1194 | x.stats = fx.Stats |
1195 | x.importCount = fx.ImportCount |
1196 | x.packagePath = fx.PackagePath |
1197 | x.exports = fx.Exports |
1198 | x.idents = fx.Idents |
1199 | x.opts = fx.Opts |
1200 | if fx.Fulltext { |
1201 | x.fset = token.NewFileSet() |
1202 | decode := func(x interface{}) error { |
1203 | return gob.NewDecoder(r).Decode(x) |
1204 | } |
1205 | if err := x.fset.Read(decode); err != nil { |
1206 | return n, err |
1207 | } |
1208 | x.suffixes = new(suffixarray.Index) |
1209 | if err := x.suffixes.Read(r); err != nil { |
1210 | return n, err |
1211 | } |
1212 | } |
1213 | return n, nil |
1214 | } |
1215 | |
1216 | // Stats returns index statistics. |
1217 | func (x *Index) Stats() Statistics { |
1218 | return x.stats |
1219 | } |
1220 | |
1221 | // ImportCount returns a map from import paths to how many times they were seen. |
1222 | func (x *Index) ImportCount() map[string]int { |
1223 | return x.importCount |
1224 | } |
1225 | |
1226 | // PackagePath returns a map from short package name to a set |
1227 | // of full package path names that use that short package name. |
1228 | func (x *Index) PackagePath() map[string]map[string]bool { |
1229 | return x.packagePath |
1230 | } |
1231 | |
1232 | // Exports returns a map from full package path to exported |
1233 | // symbol name to its type. |
1234 | func (x *Index) Exports() map[string]map[string]SpotKind { |
1235 | return x.exports |
1236 | } |
1237 | |
1238 | // Idents returns a map from identifier type to exported |
1239 | // symbol name to the list of identifiers matching that name. |
1240 | func (x *Index) Idents() map[SpotKind]map[string][]Ident { |
1241 | return x.idents |
1242 | } |
1243 | |
1244 | func (x *Index) lookupWord(w string) (match *LookupResult, alt *AltWords) { |
1245 | match = x.words[w] |
1246 | alt = x.alts[canonical(w)] |
1247 | // remove current spelling from alternatives |
1248 | // (if there is no match, the alternatives do |
1249 | // not contain the current spelling) |
1250 | if match != nil && alt != nil { |
1251 | alt = alt.filter(w) |
1252 | } |
1253 | return |
1254 | } |
1255 | |
1256 | // isIdentifier reports whether s is a Go identifier. |
1257 | func isIdentifier(s string) bool { |
1258 | for i, ch := range s { |
1259 | if unicode.IsLetter(ch) || ch == '_' || i > 0 && unicode.IsDigit(ch) { |
1260 | continue |
1261 | } |
1262 | return false |
1263 | } |
1264 | return len(s) > 0 |
1265 | } |
1266 | |
1267 | // For a given query, which is either a single identifier or a qualified |
1268 | // identifier, Lookup returns a SearchResult containing packages, a LookupResult, a |
1269 | // list of alternative spellings, and identifiers, if any. Any and all results |
1270 | // may be nil. If the query syntax is wrong, an error is reported. |
1271 | func (x *Index) Lookup(query string) (*SearchResult, error) { |
1272 | ss := strings.Split(query, ".") |
1273 | |
1274 | // check query syntax |
1275 | for _, s := range ss { |
1276 | if !isIdentifier(s) { |
1277 | return nil, errors.New("all query parts must be identifiers") |
1278 | } |
1279 | } |
1280 | rslt := &SearchResult{ |
1281 | Query: query, |
1282 | Idents: make(map[SpotKind][]Ident, 5), |
1283 | } |
1284 | // handle simple and qualified identifiers |
1285 | switch len(ss) { |
1286 | case 1: |
1287 | ident := ss[0] |
1288 | rslt.Hit, rslt.Alt = x.lookupWord(ident) |
1289 | if rslt.Hit != nil { |
1290 | // found a match - filter packages with same name |
1291 | // for the list of packages called ident, if any |
1292 | rslt.Pak = rslt.Hit.Others.filter(ident) |
1293 | } |
1294 | for k, v := range x.idents { |
1295 | const rsltLimit = 50 |
1296 | ids := byImportCount{v[ident], x.importCount} |
1297 | rslt.Idents[k] = ids.top(rsltLimit) |
1298 | } |
1299 | |
1300 | case 2: |
1301 | pakname, ident := ss[0], ss[1] |
1302 | rslt.Hit, rslt.Alt = x.lookupWord(ident) |
1303 | if rslt.Hit != nil { |
1304 | // found a match - filter by package name |
1305 | // (no paks - package names are not qualified) |
1306 | decls := rslt.Hit.Decls.filter(pakname) |
1307 | others := rslt.Hit.Others.filter(pakname) |
1308 | rslt.Hit = &LookupResult{decls, others} |
1309 | } |
1310 | for k, v := range x.idents { |
1311 | ids := byImportCount{v[ident], x.importCount} |
1312 | rslt.Idents[k] = ids.filter(pakname) |
1313 | } |
1314 | |
1315 | default: |
1316 | return nil, errors.New("query is not a (qualified) identifier") |
1317 | } |
1318 | |
1319 | return rslt, nil |
1320 | } |
1321 | |
1322 | func (x *Index) Snippet(i int) *Snippet { |
1323 | // handle illegal snippet indices gracefully |
1324 | if 0 <= i && i < len(x.snippets) { |
1325 | return x.snippets[i] |
1326 | } |
1327 | return nil |
1328 | } |
1329 | |
1330 | type positionList []struct { |
1331 | filename string |
1332 | line int |
1333 | } |
1334 | |
1335 | func (list positionList) Len() int { return len(list) } |
1336 | func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename } |
1337 | func (list positionList) Swap(i, j int) { list[i], list[j] = list[j], list[i] } |
1338 | |
1339 | // unique returns the list sorted and with duplicate entries removed |
1340 | func unique(list []int) []int { |
1341 | sort.Ints(list) |
1342 | var last int |
1343 | i := 0 |
1344 | for _, x := range list { |
1345 | if i == 0 || x != last { |
1346 | last = x |
1347 | list[i] = x |
1348 | i++ |
1349 | } |
1350 | } |
1351 | return list[0:i] |
1352 | } |
1353 | |
1354 | // A FileLines value specifies a file and line numbers within that file. |
1355 | type FileLines struct { |
1356 | Filename string |
1357 | Lines []int |
1358 | } |
1359 | |
1360 | // LookupRegexp returns the number of matches and the matches where a regular |
1361 | // expression r is found in the full text index. At most n matches are |
1362 | // returned (thus found <= n). |
1363 | func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) { |
1364 | if x.suffixes == nil || n <= 0 { |
1365 | return |
1366 | } |
1367 | // n > 0 |
1368 | |
1369 | var list positionList |
1370 | // FindAllIndex may returns matches that span across file boundaries. |
1371 | // Such matches are unlikely, buf after eliminating them we may end up |
1372 | // with fewer than n matches. If we don't have enough at the end, redo |
1373 | // the search with an increased value n1, but only if FindAllIndex |
1374 | // returned all the requested matches in the first place (if it |
1375 | // returned fewer than that there cannot be more). |
1376 | for n1 := n; found < n; n1 += n - found { |
1377 | found = 0 |
1378 | matches := x.suffixes.FindAllIndex(r, n1) |
1379 | // compute files, exclude matches that span file boundaries, |
1380 | // and map offsets to file-local offsets |
1381 | list = make(positionList, len(matches)) |
1382 | for _, m := range matches { |
1383 | // by construction, an offset corresponds to the Pos value |
1384 | // for the file set - use it to get the file and line |
1385 | p := token.Pos(m[0]) |
1386 | if file := x.fset.File(p); file != nil { |
1387 | if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() { |
1388 | // match [m[0], m[1]) is within the file boundaries |
1389 | list[found].filename = file.Name() |
1390 | list[found].line = file.Line(p) |
1391 | found++ |
1392 | } |
1393 | } |
1394 | } |
1395 | if found == n || len(matches) < n1 { |
1396 | // found all matches or there's no chance to find more |
1397 | break |
1398 | } |
1399 | } |
1400 | list = list[0:found] |
1401 | sort.Sort(list) // sort by filename |
1402 | |
1403 | // collect matches belonging to the same file |
1404 | var last string |
1405 | var lines []int |
1406 | addLines := func() { |
1407 | if len(lines) > 0 { |
1408 | // remove duplicate lines |
1409 | result = append(result, FileLines{last, unique(lines)}) |
1410 | lines = nil |
1411 | } |
1412 | } |
1413 | for _, m := range list { |
1414 | if m.filename != last { |
1415 | addLines() |
1416 | last = m.filename |
1417 | } |
1418 | lines = append(lines, m.line) |
1419 | } |
1420 | addLines() |
1421 | |
1422 | return |
1423 | } |
1424 | |
1425 | // InvalidateIndex should be called whenever any of the file systems |
1426 | // under godoc's observation change so that the indexer is kicked on. |
1427 | func (c *Corpus) invalidateIndex() { |
1428 | c.fsModified.Set(nil) |
1429 | c.refreshMetadata() |
1430 | } |
1431 | |
1432 | // feedDirnames feeds the directory names of all directories |
1433 | // under the file system given by root to channel c. |
1434 | func (c *Corpus) feedDirnames(ch chan<- string) { |
1435 | if dir, _ := c.fsTree.Get(); dir != nil { |
1436 | for d := range dir.(*Directory).iter(false) { |
1437 | ch <- d.Path |
1438 | } |
1439 | } |
1440 | } |
1441 | |
1442 | // fsDirnames() returns a channel sending all directory names |
1443 | // of all the file systems under godoc's observation. |
1444 | func (c *Corpus) fsDirnames() <-chan string { |
1445 | ch := make(chan string, 256) // buffered for fewer context switches |
1446 | go func() { |
1447 | c.feedDirnames(ch) |
1448 | close(ch) |
1449 | }() |
1450 | return ch |
1451 | } |
1452 | |
1453 | // CompatibleWith reports whether the Index x is compatible with the corpus |
1454 | // indexing options set in c. |
1455 | func (x *Index) CompatibleWith(c *Corpus) bool { |
1456 | return x.opts.Docs == c.IndexDocs && |
1457 | x.opts.GoCode == c.IndexGoCode && |
1458 | x.opts.FullText == c.IndexFullText && |
1459 | x.opts.MaxResults == c.MaxResults |
1460 | } |
1461 | |
1462 | func (c *Corpus) readIndex(filenames string) error { |
1463 | matches, err := filepath.Glob(filenames) |
1464 | if err != nil { |
1465 | return err |
1466 | } else if matches == nil { |
1467 | return fmt.Errorf("no index files match %q", filenames) |
1468 | } |
1469 | sort.Strings(matches) // make sure files are in the right order |
1470 | files := make([]io.Reader, 0, len(matches)) |
1471 | for _, filename := range matches { |
1472 | f, err := os.Open(filename) |
1473 | if err != nil { |
1474 | return err |
1475 | } |
1476 | defer f.Close() |
1477 | files = append(files, f) |
1478 | } |
1479 | return c.ReadIndexFrom(io.MultiReader(files...)) |
1480 | } |
1481 | |
1482 | // ReadIndexFrom sets the current index from the serialized version found in r. |
1483 | func (c *Corpus) ReadIndexFrom(r io.Reader) error { |
1484 | x := new(Index) |
1485 | if _, err := x.ReadFrom(r); err != nil { |
1486 | return err |
1487 | } |
1488 | if !x.CompatibleWith(c) { |
1489 | return fmt.Errorf("index file options are incompatible: %v", x.opts) |
1490 | } |
1491 | c.searchIndex.Set(x) |
1492 | return nil |
1493 | } |
1494 | |
1495 | func (c *Corpus) UpdateIndex() { |
1496 | if c.Verbose { |
1497 | log.Printf("updating index...") |
1498 | } |
1499 | start := time.Now() |
1500 | index := c.NewIndex() |
1501 | stop := time.Now() |
1502 | c.searchIndex.Set(index) |
1503 | if c.Verbose { |
1504 | secs := stop.Sub(start).Seconds() |
1505 | stats := index.Stats() |
1506 | log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)", |
1507 | secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots) |
1508 | } |
1509 | memstats := new(runtime.MemStats) |
1510 | runtime.ReadMemStats(memstats) |
1511 | if c.Verbose { |
1512 | log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys) |
1513 | } |
1514 | runtime.GC() |
1515 | runtime.ReadMemStats(memstats) |
1516 | if c.Verbose { |
1517 | log.Printf("after GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys) |
1518 | } |
1519 | } |
1520 | |
1521 | // RunIndexer runs forever, indexing. |
1522 | func (c *Corpus) RunIndexer() { |
1523 | // initialize the index from disk if possible |
1524 | if c.IndexFiles != "" { |
1525 | c.initFSTree() |
1526 | if err := c.readIndex(c.IndexFiles); err != nil { |
1527 | log.Printf("error reading index from file %s: %v", c.IndexFiles, err) |
1528 | } |
1529 | return |
1530 | } |
1531 | |
1532 | // Repeatedly update the package directory tree and index. |
1533 | for { |
1534 | c.initFSTree() |
1535 | c.UpdateIndex() |
1536 | if c.IndexInterval < 0 { |
1537 | return |
1538 | } |
1539 | delay := 5 * time.Minute // by default, reindex every 5 minutes |
1540 | if c.IndexInterval > 0 { |
1541 | delay = c.IndexInterval |
1542 | } |
1543 | time.Sleep(delay) |
1544 | } |
1545 | } |
1546 | |
1547 | type countingWriter struct { |
1548 | n *int64 |
1549 | w io.Writer |
1550 | } |
1551 | |
1552 | func (c countingWriter) Write(p []byte) (n int, err error) { |
1553 | n, err = c.w.Write(p) |
1554 | *c.n += int64(n) |
1555 | return |
1556 | } |
1557 | |
1558 | type byteReader interface { |
1559 | io.Reader |
1560 | io.ByteReader |
1561 | } |
1562 | |
1563 | type countingReader struct { |
1564 | n *int64 |
1565 | r byteReader |
1566 | } |
1567 | |
1568 | func (c countingReader) Read(p []byte) (n int, err error) { |
1569 | n, err = c.r.Read(p) |
1570 | *c.n += int64(n) |
1571 | return |
1572 | } |
1573 | |
1574 | func (c countingReader) ReadByte() (b byte, err error) { |
1575 | b, err = c.r.ReadByte() |
1576 | *c.n += 1 |
1577 | return |
1578 | } |
1579 |
Members