| 1 | // Copyright 2009 The Go Authors. All rights reserved. |
|---|---|
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // This file contains the infrastructure to create an |
| 6 | // identifier and full-text index for a set of Go files. |
| 7 | // |
| 8 | // Algorithm for identifier index: |
| 9 | // - traverse all .go files of the file tree specified by root |
| 10 | // - for each identifier (word) encountered, collect all occurrences (spots) |
| 11 | // into a list; this produces a list of spots for each word |
| 12 | // - reduce the lists: from a list of spots to a list of FileRuns, |
| 13 | // and from a list of FileRuns into a list of PakRuns |
| 14 | // - make a HitList from the PakRuns |
| 15 | // |
| 16 | // Details: |
| 17 | // - keep two lists per word: one containing package-level declarations |
| 18 | // that have snippets, and one containing all other spots |
| 19 | // - keep the snippets in a separate table indexed by snippet index |
| 20 | // and store the snippet index in place of the line number in a SpotInfo |
| 21 | // (the line number for spots with snippets is stored in the snippet) |
| 22 | // - at the end, create lists of alternative spellings for a given |
| 23 | // word |
| 24 | // |
| 25 | // Algorithm for full text index: |
| 26 | // - concatenate all source code in a byte buffer (in memory) |
| 27 | // - add the files to a file set in lockstep as they are added to the byte |
| 28 | // buffer such that a byte buffer offset corresponds to the Pos value for |
| 29 | // that file location |
| 30 | // - create a suffix array from the concatenated sources |
| 31 | // |
| 32 | // String lookup in full text index: |
| 33 | // - use the suffix array to lookup a string's offsets - the offsets |
| 34 | // correspond to the Pos values relative to the file set |
| 35 | // - translate the Pos values back into file and line information and |
| 36 | // sort the result |
| 37 | |
| 38 | package godoc |
| 39 | |
| 40 | import ( |
| 41 | "bufio" |
| 42 | "bytes" |
| 43 | "encoding/gob" |
| 44 | "errors" |
| 45 | "fmt" |
| 46 | "go/ast" |
| 47 | "go/doc" |
| 48 | "go/parser" |
| 49 | "go/token" |
| 50 | "index/suffixarray" |
| 51 | "io" |
| 52 | "log" |
| 53 | "math" |
| 54 | "os" |
| 55 | pathpkg "path" |
| 56 | "path/filepath" |
| 57 | "regexp" |
| 58 | "runtime" |
| 59 | "sort" |
| 60 | "strconv" |
| 61 | "strings" |
| 62 | "sync" |
| 63 | "time" |
| 64 | "unicode" |
| 65 | |
| 66 | "golang.org/x/tools/godoc/util" |
| 67 | "golang.org/x/tools/godoc/vfs" |
| 68 | ) |
| 69 | |
| 70 | // ---------------------------------------------------------------------------- |
| 71 | // InterfaceSlice is a helper type for sorting interface |
| 72 | // slices according to some slice-specific sort criteria. |
| 73 | |
| 74 | type comparer func(x, y interface{}) bool |
| 75 | |
| 76 | type interfaceSlice struct { |
| 77 | slice []interface{} |
| 78 | less comparer |
| 79 | } |
| 80 | |
| 81 | // ---------------------------------------------------------------------------- |
| 82 | // RunList |
| 83 | |
| 84 | // A RunList is a list of entries that can be sorted according to some |
| 85 | // criteria. A RunList may be compressed by grouping "runs" of entries |
| 86 | // which are equal (according to the sort criteria) into a new RunList of |
| 87 | // runs. For instance, a RunList containing pairs (x, y) may be compressed |
| 88 | // into a RunList containing pair runs (x, {y}) where each run consists of |
| 89 | // a list of y's with the same x. |
| 90 | type RunList []interface{} |
| 91 | |
| 92 | func (h RunList) sort(less comparer) { |
| 93 | sort.Sort(&interfaceSlice{h, less}) |
| 94 | } |
| 95 | |
| 96 | func (p *interfaceSlice) Len() int { return len(p.slice) } |
| 97 | func (p *interfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) } |
| 98 | func (p *interfaceSlice) Swap(i, j int) { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] } |
| 99 | |
| 100 | // Compress entries which are the same according to a sort criteria |
| 101 | // (specified by less) into "runs". |
| 102 | func (h RunList) reduce(less comparer, newRun func(h RunList) interface{}) RunList { |
| 103 | if len(h) == 0 { |
| 104 | return nil |
| 105 | } |
| 106 | // len(h) > 0 |
| 107 | |
| 108 | // create runs of entries with equal values |
| 109 | h.sort(less) |
| 110 | |
| 111 | // for each run, make a new run object and collect them in a new RunList |
| 112 | var hh RunList |
| 113 | i, x := 0, h[0] |
| 114 | for j, y := range h { |
| 115 | if less(x, y) { |
| 116 | hh = append(hh, newRun(h[i:j])) |
| 117 | i, x = j, h[j] // start a new run |
| 118 | } |
| 119 | } |
| 120 | // add final run, if any |
| 121 | if i < len(h) { |
| 122 | hh = append(hh, newRun(h[i:])) |
| 123 | } |
| 124 | |
| 125 | return hh |
| 126 | } |
| 127 | |
| 128 | // ---------------------------------------------------------------------------- |
| 129 | // KindRun |
| 130 | |
| 131 | // Debugging support. Disable to see multiple entries per line. |
| 132 | const removeDuplicates = true |
| 133 | |
| 134 | // A KindRun is a run of SpotInfos of the same kind in a given file. |
| 135 | // The kind (3 bits) is stored in each SpotInfo element; to find the |
| 136 | // kind of a KindRun, look at any of its elements. |
| 137 | type KindRun []SpotInfo |
| 138 | |
| 139 | // KindRuns are sorted by line number or index. Since the isIndex bit |
| 140 | // is always the same for all infos in one list we can compare lori's. |
| 141 | func (k KindRun) Len() int { return len(k) } |
| 142 | func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() } |
| 143 | func (k KindRun) Swap(i, j int) { k[i], k[j] = k[j], k[i] } |
| 144 | |
| 145 | // FileRun contents are sorted by Kind for the reduction into KindRuns. |
| 146 | func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() } |
| 147 | |
| 148 | // newKindRun allocates a new KindRun from the SpotInfo run h. |
| 149 | func newKindRun(h RunList) interface{} { |
| 150 | run := make(KindRun, len(h)) |
| 151 | for i, x := range h { |
| 152 | run[i] = x.(SpotInfo) |
| 153 | } |
| 154 | |
| 155 | // Spots were sorted by file and kind to create this run. |
| 156 | // Within this run, sort them by line number or index. |
| 157 | sort.Sort(run) |
| 158 | |
| 159 | if removeDuplicates { |
| 160 | // Since both the lori and kind field must be |
| 161 | // same for duplicates, and since the isIndex |
| 162 | // bit is always the same for all infos in one |
| 163 | // list we can simply compare the entire info. |
| 164 | k := 0 |
| 165 | prev := SpotInfo(math.MaxUint32) // an unlikely value |
| 166 | for _, x := range run { |
| 167 | if x != prev { |
| 168 | run[k] = x |
| 169 | k++ |
| 170 | prev = x |
| 171 | } |
| 172 | } |
| 173 | run = run[0:k] |
| 174 | } |
| 175 | |
| 176 | return run |
| 177 | } |
| 178 | |
| 179 | // ---------------------------------------------------------------------------- |
| 180 | // FileRun |
| 181 | |
| 182 | // A Pak describes a Go package. |
| 183 | type Pak struct { |
| 184 | Path string // path of directory containing the package |
| 185 | Name string // package name as declared by package clause |
| 186 | } |
| 187 | |
| 188 | // Paks are sorted by name (primary key) and by import path (secondary key). |
| 189 | func (p *Pak) less(q *Pak) bool { |
| 190 | return p.Name < q.Name || p.Name == q.Name && p.Path < q.Path |
| 191 | } |
| 192 | |
| 193 | // A File describes a Go file. |
| 194 | type File struct { |
| 195 | Name string // directory-local file name |
| 196 | Pak *Pak // the package to which the file belongs |
| 197 | } |
| 198 | |
| 199 | // Path returns the file path of f. |
| 200 | func (f *File) Path() string { |
| 201 | return pathpkg.Join(f.Pak.Path, f.Name) |
| 202 | } |
| 203 | |
| 204 | // A Spot describes a single occurrence of a word. |
| 205 | type Spot struct { |
| 206 | File *File |
| 207 | Info SpotInfo |
| 208 | } |
| 209 | |
| 210 | // A FileRun is a list of KindRuns belonging to the same file. |
| 211 | type FileRun struct { |
| 212 | File *File |
| 213 | Groups []KindRun |
| 214 | } |
| 215 | |
| 216 | // Spots are sorted by file path for the reduction into FileRuns. |
| 217 | func lessSpot(x, y interface{}) bool { |
| 218 | fx := x.(Spot).File |
| 219 | fy := y.(Spot).File |
| 220 | // same as "return fx.Path() < fy.Path()" but w/o computing the file path first |
| 221 | px := fx.Pak.Path |
| 222 | py := fy.Pak.Path |
| 223 | return px < py || px == py && fx.Name < fy.Name |
| 224 | } |
| 225 | |
| 226 | // newFileRun allocates a new FileRun from the Spot run h. |
| 227 | func newFileRun(h RunList) interface{} { |
| 228 | file := h[0].(Spot).File |
| 229 | |
| 230 | // reduce the list of Spots into a list of KindRuns |
| 231 | h1 := make(RunList, len(h)) |
| 232 | for i, x := range h { |
| 233 | h1[i] = x.(Spot).Info |
| 234 | } |
| 235 | h2 := h1.reduce(lessKind, newKindRun) |
| 236 | |
| 237 | // create the FileRun |
| 238 | groups := make([]KindRun, len(h2)) |
| 239 | for i, x := range h2 { |
| 240 | groups[i] = x.(KindRun) |
| 241 | } |
| 242 | return &FileRun{file, groups} |
| 243 | } |
| 244 | |
| 245 | // ---------------------------------------------------------------------------- |
| 246 | // PakRun |
| 247 | |
| 248 | // A PakRun describes a run of *FileRuns of a package. |
| 249 | type PakRun struct { |
| 250 | Pak *Pak |
| 251 | Files []*FileRun |
| 252 | } |
| 253 | |
| 254 | // Sorting support for files within a PakRun. |
| 255 | func (p *PakRun) Len() int { return len(p.Files) } |
| 256 | func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name } |
| 257 | func (p *PakRun) Swap(i, j int) { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] } |
| 258 | |
| 259 | // FileRuns are sorted by package for the reduction into PakRuns. |
| 260 | func lessFileRun(x, y interface{}) bool { |
| 261 | return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak) |
| 262 | } |
| 263 | |
| 264 | // newPakRun allocates a new PakRun from the *FileRun run h. |
| 265 | func newPakRun(h RunList) interface{} { |
| 266 | pak := h[0].(*FileRun).File.Pak |
| 267 | files := make([]*FileRun, len(h)) |
| 268 | for i, x := range h { |
| 269 | files[i] = x.(*FileRun) |
| 270 | } |
| 271 | run := &PakRun{pak, files} |
| 272 | sort.Sort(run) // files were sorted by package; sort them by file now |
| 273 | return run |
| 274 | } |
| 275 | |
| 276 | // ---------------------------------------------------------------------------- |
| 277 | // HitList |
| 278 | |
| 279 | // A HitList describes a list of PakRuns. |
| 280 | type HitList []*PakRun |
| 281 | |
| 282 | // PakRuns are sorted by package. |
| 283 | func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) } |
| 284 | |
| 285 | func reduce(h0 RunList) HitList { |
| 286 | // reduce a list of Spots into a list of FileRuns |
| 287 | h1 := h0.reduce(lessSpot, newFileRun) |
| 288 | // reduce a list of FileRuns into a list of PakRuns |
| 289 | h2 := h1.reduce(lessFileRun, newPakRun) |
| 290 | // sort the list of PakRuns by package |
| 291 | h2.sort(lessPakRun) |
| 292 | // create a HitList |
| 293 | h := make(HitList, len(h2)) |
| 294 | for i, p := range h2 { |
| 295 | h[i] = p.(*PakRun) |
| 296 | } |
| 297 | return h |
| 298 | } |
| 299 | |
| 300 | // filter returns a new HitList created by filtering |
| 301 | // all PakRuns from h that have a matching pakname. |
| 302 | func (h HitList) filter(pakname string) HitList { |
| 303 | var hh HitList |
| 304 | for _, p := range h { |
| 305 | if p.Pak.Name == pakname { |
| 306 | hh = append(hh, p) |
| 307 | } |
| 308 | } |
| 309 | return hh |
| 310 | } |
| 311 | |
| 312 | // ---------------------------------------------------------------------------- |
| 313 | // AltWords |
| 314 | |
| 315 | type wordPair struct { |
| 316 | canon string // canonical word spelling (all lowercase) |
| 317 | alt string // alternative spelling |
| 318 | } |
| 319 | |
| 320 | // An AltWords describes a list of alternative spellings for a |
| 321 | // canonical (all lowercase) spelling of a word. |
| 322 | type AltWords struct { |
| 323 | Canon string // canonical word spelling (all lowercase) |
| 324 | Alts []string // alternative spelling for the same word |
| 325 | } |
| 326 | |
| 327 | // wordPairs are sorted by their canonical spelling. |
| 328 | func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon } |
| 329 | |
| 330 | // newAltWords allocates a new AltWords from the *wordPair run h. |
| 331 | func newAltWords(h RunList) interface{} { |
| 332 | canon := h[0].(*wordPair).canon |
| 333 | alts := make([]string, len(h)) |
| 334 | for i, x := range h { |
| 335 | alts[i] = x.(*wordPair).alt |
| 336 | } |
| 337 | return &AltWords{canon, alts} |
| 338 | } |
| 339 | |
| 340 | func (a *AltWords) filter(s string) *AltWords { |
| 341 | var alts []string |
| 342 | for _, w := range a.Alts { |
| 343 | if w != s { |
| 344 | alts = append(alts, w) |
| 345 | } |
| 346 | } |
| 347 | if len(alts) > 0 { |
| 348 | return &AltWords{a.Canon, alts} |
| 349 | } |
| 350 | return nil |
| 351 | } |
| 352 | |
| 353 | // Ident stores information about external identifiers in order to create |
| 354 | // links to package documentation. |
| 355 | type Ident struct { |
| 356 | Path string // e.g. "net/http" |
| 357 | Package string // e.g. "http" |
| 358 | Name string // e.g. "NewRequest" |
| 359 | Doc string // e.g. "NewRequest returns a new Request..." |
| 360 | } |
| 361 | |
| 362 | // byImportCount sorts the given slice of Idents by the import |
| 363 | // counts of the packages to which they belong. |
| 364 | type byImportCount struct { |
| 365 | Idents []Ident |
| 366 | ImportCount map[string]int |
| 367 | } |
| 368 | |
| 369 | func (ic byImportCount) Len() int { |
| 370 | return len(ic.Idents) |
| 371 | } |
| 372 | |
| 373 | func (ic byImportCount) Less(i, j int) bool { |
| 374 | ri := ic.ImportCount[ic.Idents[i].Path] |
| 375 | rj := ic.ImportCount[ic.Idents[j].Path] |
| 376 | if ri == rj { |
| 377 | return ic.Idents[i].Path < ic.Idents[j].Path |
| 378 | } |
| 379 | return ri > rj |
| 380 | } |
| 381 | |
| 382 | func (ic byImportCount) Swap(i, j int) { |
| 383 | ic.Idents[i], ic.Idents[j] = ic.Idents[j], ic.Idents[i] |
| 384 | } |
| 385 | |
| 386 | func (ic byImportCount) String() string { |
| 387 | buf := bytes.NewBuffer([]byte("[")) |
| 388 | for _, v := range ic.Idents { |
| 389 | buf.WriteString(fmt.Sprintf("\n\t%s, %s (%d)", v.Path, v.Name, ic.ImportCount[v.Path])) |
| 390 | } |
| 391 | buf.WriteString("\n]") |
| 392 | return buf.String() |
| 393 | } |
| 394 | |
| 395 | // filter creates a new Ident list where the results match the given |
| 396 | // package name. |
| 397 | func (ic byImportCount) filter(pakname string) []Ident { |
| 398 | if ic.Idents == nil { |
| 399 | return nil |
| 400 | } |
| 401 | var res []Ident |
| 402 | for _, i := range ic.Idents { |
| 403 | if i.Package == pakname { |
| 404 | res = append(res, i) |
| 405 | } |
| 406 | } |
| 407 | return res |
| 408 | } |
| 409 | |
| 410 | // top returns the top n identifiers. |
| 411 | func (ic byImportCount) top(n int) []Ident { |
| 412 | if len(ic.Idents) > n { |
| 413 | return ic.Idents[:n] |
| 414 | } |
| 415 | return ic.Idents |
| 416 | } |
| 417 | |
| 418 | // ---------------------------------------------------------------------------- |
| 419 | // Indexer |
| 420 | |
| 421 | type IndexResult struct { |
| 422 | Decls RunList // package-level declarations (with snippets) |
| 423 | Others RunList // all other occurrences |
| 424 | } |
| 425 | |
| 426 | // Statistics provides statistics information for an index. |
| 427 | type Statistics struct { |
| 428 | Bytes int // total size of indexed source files |
| 429 | Files int // number of indexed source files |
| 430 | Lines int // number of lines (all files) |
| 431 | Words int // number of different identifiers |
| 432 | Spots int // number of identifier occurrences |
| 433 | } |
| 434 | |
| 435 | // An Indexer maintains the data structures and provides the machinery |
| 436 | // for indexing .go files under a file tree. It implements the path.Visitor |
| 437 | // interface for walking file trees, and the ast.Visitor interface for |
| 438 | // walking Go ASTs. |
| 439 | type Indexer struct { |
| 440 | c *Corpus |
| 441 | fset *token.FileSet // file set for all indexed files |
| 442 | fsOpenGate chan bool // send pre fs.Open; receive on close |
| 443 | |
| 444 | mu sync.Mutex // guards all the following |
| 445 | sources bytes.Buffer // concatenated sources |
| 446 | strings map[string]string // interned string |
| 447 | packages map[Pak]*Pak // interned *Paks |
| 448 | words map[string]*IndexResult // RunLists of Spots |
| 449 | snippets []*Snippet // indices are stored in SpotInfos |
| 450 | current *token.File // last file added to file set |
| 451 | file *File // AST for current file |
| 452 | decl ast.Decl // AST for current decl |
| 453 | stats Statistics |
| 454 | throttle *util.Throttle |
| 455 | importCount map[string]int // package path ("net/http") => count |
| 456 | packagePath map[string]map[string]bool // "template" => "text/template" => true |
| 457 | exports map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl |
| 458 | curPkgExports map[string]SpotKind |
| 459 | idents map[SpotKind]map[string][]Ident // kind => name => list of Idents |
| 460 | } |
| 461 | |
| 462 | func (x *Indexer) intern(s string) string { |
| 463 | if s, ok := x.strings[s]; ok { |
| 464 | return s |
| 465 | } |
| 466 | x.strings[s] = s |
| 467 | return s |
| 468 | } |
| 469 | |
| 470 | func (x *Indexer) lookupPackage(path, name string) *Pak { |
| 471 | // In the source directory tree, more than one package may |
| 472 | // live in the same directory. For the packages map, construct |
| 473 | // a key that includes both the directory path and the package |
| 474 | // name. |
| 475 | key := Pak{Path: x.intern(path), Name: x.intern(name)} |
| 476 | pak := x.packages[key] |
| 477 | if pak == nil { |
| 478 | pak = &key |
| 479 | x.packages[key] = pak |
| 480 | } |
| 481 | return pak |
| 482 | } |
| 483 | |
| 484 | func (x *Indexer) addSnippet(s *Snippet) int { |
| 485 | index := len(x.snippets) |
| 486 | x.snippets = append(x.snippets, s) |
| 487 | return index |
| 488 | } |
| 489 | |
| 490 | func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) { |
| 491 | if id == nil { |
| 492 | return |
| 493 | } |
| 494 | name := x.intern(id.Name) |
| 495 | |
| 496 | switch kind { |
| 497 | case TypeDecl, FuncDecl, ConstDecl, VarDecl: |
| 498 | x.curPkgExports[name] = kind |
| 499 | } |
| 500 | |
| 501 | lists, found := x.words[name] |
| 502 | if !found { |
| 503 | lists = new(IndexResult) |
| 504 | x.words[name] = lists |
| 505 | } |
| 506 | |
| 507 | if kind == Use || x.decl == nil { |
| 508 | if x.c.IndexGoCode { |
| 509 | // not a declaration or no snippet required |
| 510 | info := makeSpotInfo(kind, x.current.Line(id.Pos()), false) |
| 511 | lists.Others = append(lists.Others, Spot{x.file, info}) |
| 512 | } |
| 513 | } else { |
| 514 | // a declaration with snippet |
| 515 | index := x.addSnippet(NewSnippet(x.fset, x.decl, id)) |
| 516 | info := makeSpotInfo(kind, index, true) |
| 517 | lists.Decls = append(lists.Decls, Spot{x.file, info}) |
| 518 | } |
| 519 | |
| 520 | x.stats.Spots++ |
| 521 | } |
| 522 | |
| 523 | func (x *Indexer) visitFieldList(kind SpotKind, flist *ast.FieldList) { |
| 524 | for _, f := range flist.List { |
| 525 | x.decl = nil // no snippets for fields |
| 526 | for _, name := range f.Names { |
| 527 | x.visitIdent(kind, name) |
| 528 | } |
| 529 | ast.Walk(x, f.Type) |
| 530 | // ignore tag - not indexed at the moment |
| 531 | } |
| 532 | } |
| 533 | |
| 534 | func (x *Indexer) visitSpec(kind SpotKind, spec ast.Spec) { |
| 535 | switch n := spec.(type) { |
| 536 | case *ast.ImportSpec: |
| 537 | x.visitIdent(ImportDecl, n.Name) |
| 538 | if n.Path != nil { |
| 539 | if imp, err := strconv.Unquote(n.Path.Value); err == nil { |
| 540 | x.importCount[x.intern(imp)]++ |
| 541 | } |
| 542 | } |
| 543 | |
| 544 | case *ast.ValueSpec: |
| 545 | for _, n := range n.Names { |
| 546 | x.visitIdent(kind, n) |
| 547 | } |
| 548 | ast.Walk(x, n.Type) |
| 549 | for _, v := range n.Values { |
| 550 | ast.Walk(x, v) |
| 551 | } |
| 552 | |
| 553 | case *ast.TypeSpec: |
| 554 | x.visitIdent(TypeDecl, n.Name) |
| 555 | ast.Walk(x, n.Type) |
| 556 | } |
| 557 | } |
| 558 | |
| 559 | func (x *Indexer) visitGenDecl(decl *ast.GenDecl) { |
| 560 | kind := VarDecl |
| 561 | if decl.Tok == token.CONST { |
| 562 | kind = ConstDecl |
| 563 | } |
| 564 | x.decl = decl |
| 565 | for _, s := range decl.Specs { |
| 566 | x.visitSpec(kind, s) |
| 567 | } |
| 568 | } |
| 569 | |
| 570 | func (x *Indexer) Visit(node ast.Node) ast.Visitor { |
| 571 | switch n := node.(type) { |
| 572 | case nil: |
| 573 | // nothing to do |
| 574 | |
| 575 | case *ast.Ident: |
| 576 | x.visitIdent(Use, n) |
| 577 | |
| 578 | case *ast.FieldList: |
| 579 | x.visitFieldList(VarDecl, n) |
| 580 | |
| 581 | case *ast.InterfaceType: |
| 582 | x.visitFieldList(MethodDecl, n.Methods) |
| 583 | |
| 584 | case *ast.DeclStmt: |
| 585 | // local declarations should only be *ast.GenDecls; |
| 586 | // ignore incorrect ASTs |
| 587 | if decl, ok := n.Decl.(*ast.GenDecl); ok { |
| 588 | x.decl = nil // no snippets for local declarations |
| 589 | x.visitGenDecl(decl) |
| 590 | } |
| 591 | |
| 592 | case *ast.GenDecl: |
| 593 | x.decl = n |
| 594 | x.visitGenDecl(n) |
| 595 | |
| 596 | case *ast.FuncDecl: |
| 597 | kind := FuncDecl |
| 598 | if n.Recv != nil { |
| 599 | kind = MethodDecl |
| 600 | ast.Walk(x, n.Recv) |
| 601 | } |
| 602 | x.decl = n |
| 603 | x.visitIdent(kind, n.Name) |
| 604 | ast.Walk(x, n.Type) |
| 605 | if n.Body != nil { |
| 606 | ast.Walk(x, n.Body) |
| 607 | } |
| 608 | |
| 609 | case *ast.File: |
| 610 | x.decl = nil |
| 611 | x.visitIdent(PackageClause, n.Name) |
| 612 | for _, d := range n.Decls { |
| 613 | ast.Walk(x, d) |
| 614 | } |
| 615 | |
| 616 | default: |
| 617 | return x |
| 618 | } |
| 619 | |
| 620 | return nil |
| 621 | } |
| 622 | |
| 623 | // addFile adds a file to the index if possible and returns the file set file |
| 624 | // and the file's AST if it was successfully parsed as a Go file. If addFile |
| 625 | // failed (that is, if the file was not added), it returns file == nil. |
| 626 | func (x *Indexer) addFile(f vfs.ReadSeekCloser, filename string, goFile bool) (file *token.File, ast *ast.File) { |
| 627 | defer f.Close() |
| 628 | |
| 629 | // The file set's base offset and x.sources size must be in lock-step; |
| 630 | // this permits the direct mapping of suffix array lookup results to |
| 631 | // to corresponding Pos values. |
| 632 | // |
| 633 | // When a file is added to the file set, its offset base increases by |
| 634 | // the size of the file + 1; and the initial base offset is 1. Add an |
| 635 | // extra byte to the sources here. |
| 636 | x.sources.WriteByte(0) |
| 637 | |
| 638 | // If the sources length doesn't match the file set base at this point |
| 639 | // the file set implementation changed or we have another error. |
| 640 | base := x.fset.Base() |
| 641 | if x.sources.Len() != base { |
| 642 | panic("internal error: file base incorrect") |
| 643 | } |
| 644 | |
| 645 | // append file contents (src) to x.sources |
| 646 | if _, err := x.sources.ReadFrom(f); err == nil { |
| 647 | src := x.sources.Bytes()[base:] |
| 648 | |
| 649 | if goFile { |
| 650 | // parse the file and in the process add it to the file set |
| 651 | if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil { |
| 652 | file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file |
| 653 | return |
| 654 | } |
| 655 | // file has parse errors, and the AST may be incorrect - |
| 656 | // set lines information explicitly and index as ordinary |
| 657 | // text file (cannot fall through to the text case below |
| 658 | // because the file has already been added to the file set |
| 659 | // by the parser) |
| 660 | file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file |
| 661 | file.SetLinesForContent(src) |
| 662 | ast = nil |
| 663 | return |
| 664 | } |
| 665 | |
| 666 | if util.IsText(src) { |
| 667 | // only add the file to the file set (for the full text index) |
| 668 | file = x.fset.AddFile(filename, x.fset.Base(), len(src)) |
| 669 | file.SetLinesForContent(src) |
| 670 | return |
| 671 | } |
| 672 | } |
| 673 | |
| 674 | // discard possibly added data |
| 675 | x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added |
| 676 | return |
| 677 | } |
| 678 | |
| 679 | // Design note: Using an explicit white list of permitted files for indexing |
| 680 | // makes sure that the important files are included and massively reduces the |
| 681 | // number of files to index. The advantage over a blacklist is that unexpected |
| 682 | // (non-blacklisted) files won't suddenly explode the index. |
| 683 | |
| 684 | // Files are whitelisted if they have a file name or extension |
| 685 | // present as key in whitelisted. |
| 686 | var whitelisted = map[string]bool{ |
| 687 | ".bash": true, |
| 688 | ".c": true, |
| 689 | ".cc": true, |
| 690 | ".cpp": true, |
| 691 | ".cxx": true, |
| 692 | ".css": true, |
| 693 | ".go": true, |
| 694 | ".goc": true, |
| 695 | ".h": true, |
| 696 | ".hh": true, |
| 697 | ".hpp": true, |
| 698 | ".hxx": true, |
| 699 | ".html": true, |
| 700 | ".js": true, |
| 701 | ".out": true, |
| 702 | ".py": true, |
| 703 | ".s": true, |
| 704 | ".sh": true, |
| 705 | ".txt": true, |
| 706 | ".xml": true, |
| 707 | "AUTHORS": true, |
| 708 | "CONTRIBUTORS": true, |
| 709 | "LICENSE": true, |
| 710 | "Makefile": true, |
| 711 | "PATENTS": true, |
| 712 | "README": true, |
| 713 | } |
| 714 | |
| 715 | // isWhitelisted returns true if a file is on the list |
| 716 | // of "permitted" files for indexing. The filename must |
| 717 | // be the directory-local name of the file. |
| 718 | func isWhitelisted(filename string) bool { |
| 719 | key := pathpkg.Ext(filename) |
| 720 | if key == "" { |
| 721 | // file has no extension - use entire filename |
| 722 | key = filename |
| 723 | } |
| 724 | return whitelisted[key] |
| 725 | } |
| 726 | |
| 727 | func (x *Indexer) indexDocs(dirname string, filename string, astFile *ast.File) { |
| 728 | pkgName := x.intern(astFile.Name.Name) |
| 729 | if pkgName == "main" { |
| 730 | return |
| 731 | } |
| 732 | pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/")) |
| 733 | astPkg := ast.Package{ |
| 734 | Name: pkgName, |
| 735 | Files: map[string]*ast.File{ |
| 736 | filename: astFile, |
| 737 | }, |
| 738 | } |
| 739 | var m doc.Mode |
| 740 | docPkg := doc.New(&astPkg, dirname, m) |
| 741 | addIdent := func(sk SpotKind, name string, docstr string) { |
| 742 | if x.idents[sk] == nil { |
| 743 | x.idents[sk] = make(map[string][]Ident) |
| 744 | } |
| 745 | name = x.intern(name) |
| 746 | x.idents[sk][name] = append(x.idents[sk][name], Ident{ |
| 747 | Path: pkgPath, |
| 748 | Package: pkgName, |
| 749 | Name: name, |
| 750 | Doc: doc.Synopsis(docstr), |
| 751 | }) |
| 752 | } |
| 753 | |
| 754 | if x.idents[PackageClause] == nil { |
| 755 | x.idents[PackageClause] = make(map[string][]Ident) |
| 756 | } |
| 757 | // List of words under which the package identifier will be stored. |
| 758 | // This includes the package name and the components of the directory |
| 759 | // in which it resides. |
| 760 | words := strings.Split(pathpkg.Dir(pkgPath), "/") |
| 761 | if words[0] == "." { |
| 762 | words = []string{} |
| 763 | } |
| 764 | name := x.intern(docPkg.Name) |
| 765 | synopsis := doc.Synopsis(docPkg.Doc) |
| 766 | words = append(words, name) |
| 767 | pkgIdent := Ident{ |
| 768 | Path: pkgPath, |
| 769 | Package: pkgName, |
| 770 | Name: name, |
| 771 | Doc: synopsis, |
| 772 | } |
| 773 | for _, word := range words { |
| 774 | word = x.intern(word) |
| 775 | found := false |
| 776 | pkgs := x.idents[PackageClause][word] |
| 777 | for i, p := range pkgs { |
| 778 | if p.Path == pkgPath { |
| 779 | if docPkg.Doc != "" { |
| 780 | p.Doc = synopsis |
| 781 | pkgs[i] = p |
| 782 | } |
| 783 | found = true |
| 784 | break |
| 785 | } |
| 786 | } |
| 787 | if !found { |
| 788 | x.idents[PackageClause][word] = append(x.idents[PackageClause][word], pkgIdent) |
| 789 | } |
| 790 | } |
| 791 | |
| 792 | for _, c := range docPkg.Consts { |
| 793 | for _, name := range c.Names { |
| 794 | addIdent(ConstDecl, name, c.Doc) |
| 795 | } |
| 796 | } |
| 797 | for _, t := range docPkg.Types { |
| 798 | addIdent(TypeDecl, t.Name, t.Doc) |
| 799 | for _, c := range t.Consts { |
| 800 | for _, name := range c.Names { |
| 801 | addIdent(ConstDecl, name, c.Doc) |
| 802 | } |
| 803 | } |
| 804 | for _, v := range t.Vars { |
| 805 | for _, name := range v.Names { |
| 806 | addIdent(VarDecl, name, v.Doc) |
| 807 | } |
| 808 | } |
| 809 | for _, f := range t.Funcs { |
| 810 | addIdent(FuncDecl, f.Name, f.Doc) |
| 811 | } |
| 812 | for _, f := range t.Methods { |
| 813 | addIdent(MethodDecl, f.Name, f.Doc) |
| 814 | // Change the name of methods to be "<typename>.<methodname>". |
| 815 | // They will still be indexed as <methodname>. |
| 816 | idents := x.idents[MethodDecl][f.Name] |
| 817 | idents[len(idents)-1].Name = x.intern(t.Name + "." + f.Name) |
| 818 | } |
| 819 | } |
| 820 | for _, v := range docPkg.Vars { |
| 821 | for _, name := range v.Names { |
| 822 | addIdent(VarDecl, name, v.Doc) |
| 823 | } |
| 824 | } |
| 825 | for _, f := range docPkg.Funcs { |
| 826 | addIdent(FuncDecl, f.Name, f.Doc) |
| 827 | } |
| 828 | } |
| 829 | |
| 830 | func (x *Indexer) indexGoFile(dirname string, filename string, file *token.File, astFile *ast.File) { |
| 831 | pkgName := astFile.Name.Name |
| 832 | |
| 833 | if x.c.IndexGoCode { |
| 834 | x.current = file |
| 835 | pak := x.lookupPackage(dirname, pkgName) |
| 836 | x.file = &File{filename, pak} |
| 837 | ast.Walk(x, astFile) |
| 838 | } |
| 839 | |
| 840 | if x.c.IndexDocs { |
| 841 | // Test files are already filtered out in visitFile if IndexGoCode and |
| 842 | // IndexFullText are false. Otherwise, check here. |
| 843 | isTestFile := (x.c.IndexGoCode || x.c.IndexFullText) && |
| 844 | (strings.HasSuffix(filename, "_test.go") || strings.HasPrefix(dirname, "/test/")) |
| 845 | if !isTestFile { |
| 846 | x.indexDocs(dirname, filename, astFile) |
| 847 | } |
| 848 | } |
| 849 | |
| 850 | ppKey := x.intern(pkgName) |
| 851 | if _, ok := x.packagePath[ppKey]; !ok { |
| 852 | x.packagePath[ppKey] = make(map[string]bool) |
| 853 | } |
| 854 | pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/")) |
| 855 | x.packagePath[ppKey][pkgPath] = true |
| 856 | |
| 857 | // Merge in exported symbols found walking this file into |
| 858 | // the map for that package. |
| 859 | if len(x.curPkgExports) > 0 { |
| 860 | dest, ok := x.exports[pkgPath] |
| 861 | if !ok { |
| 862 | dest = make(map[string]SpotKind) |
| 863 | x.exports[pkgPath] = dest |
| 864 | } |
| 865 | for k, v := range x.curPkgExports { |
| 866 | dest[k] = v |
| 867 | } |
| 868 | } |
| 869 | } |
| 870 | |
| 871 | func (x *Indexer) visitFile(dirname string, fi os.FileInfo) { |
| 872 | if fi.IsDir() || !x.c.IndexEnabled { |
| 873 | return |
| 874 | } |
| 875 | |
| 876 | filename := pathpkg.Join(dirname, fi.Name()) |
| 877 | goFile := isGoFile(fi) |
| 878 | |
| 879 | switch { |
| 880 | case x.c.IndexFullText: |
| 881 | if !isWhitelisted(fi.Name()) { |
| 882 | return |
| 883 | } |
| 884 | case x.c.IndexGoCode: |
| 885 | if !goFile { |
| 886 | return |
| 887 | } |
| 888 | case x.c.IndexDocs: |
| 889 | if !goFile || |
| 890 | strings.HasSuffix(fi.Name(), "_test.go") || |
| 891 | strings.HasPrefix(dirname, "/test/") { |
| 892 | return |
| 893 | } |
| 894 | default: |
| 895 | // No indexing turned on. |
| 896 | return |
| 897 | } |
| 898 | |
| 899 | x.fsOpenGate <- true |
| 900 | defer func() { <-x.fsOpenGate }() |
| 901 | |
| 902 | // open file |
| 903 | f, err := x.c.fs.Open(filename) |
| 904 | if err != nil { |
| 905 | return |
| 906 | } |
| 907 | |
| 908 | x.mu.Lock() |
| 909 | defer x.mu.Unlock() |
| 910 | |
| 911 | x.throttle.Throttle() |
| 912 | |
| 913 | x.curPkgExports = make(map[string]SpotKind) |
| 914 | file, fast := x.addFile(f, filename, goFile) |
| 915 | if file == nil { |
| 916 | return // addFile failed |
| 917 | } |
| 918 | |
| 919 | if fast != nil { |
| 920 | x.indexGoFile(dirname, fi.Name(), file, fast) |
| 921 | } |
| 922 | |
| 923 | // update statistics |
| 924 | x.stats.Bytes += file.Size() |
| 925 | x.stats.Files++ |
| 926 | x.stats.Lines += file.LineCount() |
| 927 | } |
| 928 | |
| 929 | // indexOptions contains information that affects the contents of an index. |
| 930 | type indexOptions struct { |
| 931 | // Docs provides documentation search results. |
| 932 | // It is only consulted if IndexEnabled is true. |
| 933 | // The default values is true. |
| 934 | Docs bool |
| 935 | |
| 936 | // GoCode provides Go source code search results. |
| 937 | // It is only consulted if IndexEnabled is true. |
| 938 | // The default values is true. |
| 939 | GoCode bool |
| 940 | |
| 941 | // FullText provides search results from all files. |
| 942 | // It is only consulted if IndexEnabled is true. |
| 943 | // The default values is true. |
| 944 | FullText bool |
| 945 | |
| 946 | // MaxResults optionally specifies the maximum results for indexing. |
| 947 | // The default is 1000. |
| 948 | MaxResults int |
| 949 | } |
| 950 | |
| 951 | // ---------------------------------------------------------------------------- |
| 952 | // Index |
| 953 | |
| 954 | type LookupResult struct { |
| 955 | Decls HitList // package-level declarations (with snippets) |
| 956 | Others HitList // all other occurrences |
| 957 | } |
| 958 | |
| 959 | type Index struct { |
| 960 | fset *token.FileSet // file set used during indexing; nil if no textindex |
| 961 | suffixes *suffixarray.Index // suffixes for concatenated sources; nil if no textindex |
| 962 | words map[string]*LookupResult // maps words to hit lists |
| 963 | alts map[string]*AltWords // maps canonical(words) to lists of alternative spellings |
| 964 | snippets []*Snippet // all snippets, indexed by snippet index |
| 965 | stats Statistics |
| 966 | importCount map[string]int // package path ("net/http") => count |
| 967 | packagePath map[string]map[string]bool // "template" => "text/template" => true |
| 968 | exports map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl |
| 969 | idents map[SpotKind]map[string][]Ident |
| 970 | opts indexOptions |
| 971 | } |
| 972 | |
| 973 | func canonical(w string) string { return strings.ToLower(w) } |
| 974 | |
| 975 | // Somewhat arbitrary, but I figure low enough to not hurt disk-based filesystems |
| 976 | // consuming file descriptors, where some systems have low 256 or 512 limits. |
| 977 | // Go should have a built-in way to cap fd usage under the ulimit. |
| 978 | const ( |
| 979 | maxOpenFiles = 200 |
| 980 | maxOpenDirs = 50 |
| 981 | ) |
| 982 | |
| 983 | func (c *Corpus) throttle() float64 { |
| 984 | if c.IndexThrottle <= 0 { |
| 985 | return 0.9 |
| 986 | } |
| 987 | if c.IndexThrottle > 1.0 { |
| 988 | return 1.0 |
| 989 | } |
| 990 | return c.IndexThrottle |
| 991 | } |
| 992 | |
| 993 | // NewIndex creates a new index for the .go files provided by the corpus. |
| 994 | func (c *Corpus) NewIndex() *Index { |
| 995 | // initialize Indexer |
| 996 | // (use some reasonably sized maps to start) |
| 997 | x := &Indexer{ |
| 998 | c: c, |
| 999 | fset: token.NewFileSet(), |
| 1000 | fsOpenGate: make(chan bool, maxOpenFiles), |
| 1001 | strings: make(map[string]string), |
| 1002 | packages: make(map[Pak]*Pak, 256), |
| 1003 | words: make(map[string]*IndexResult, 8192), |
| 1004 | throttle: util.NewThrottle(c.throttle(), 100*time.Millisecond), // run at least 0.1s at a time |
| 1005 | importCount: make(map[string]int), |
| 1006 | packagePath: make(map[string]map[string]bool), |
| 1007 | exports: make(map[string]map[string]SpotKind), |
| 1008 | idents: make(map[SpotKind]map[string][]Ident, 4), |
| 1009 | } |
| 1010 | |
| 1011 | // index all files in the directories given by dirnames |
| 1012 | var wg sync.WaitGroup // outstanding ReadDir + visitFile |
| 1013 | dirGate := make(chan bool, maxOpenDirs) |
| 1014 | for dirname := range c.fsDirnames() { |
| 1015 | if c.IndexDirectory != nil && !c.IndexDirectory(dirname) { |
| 1016 | continue |
| 1017 | } |
| 1018 | dirGate <- true |
| 1019 | wg.Add(1) |
| 1020 | go func(dirname string) { |
| 1021 | defer func() { <-dirGate }() |
| 1022 | defer wg.Done() |
| 1023 | |
| 1024 | list, err := c.fs.ReadDir(dirname) |
| 1025 | if err != nil { |
| 1026 | log.Printf("ReadDir(%q): %v; skipping directory", dirname, err) |
| 1027 | return // ignore this directory |
| 1028 | } |
| 1029 | for _, fi := range list { |
| 1030 | wg.Add(1) |
| 1031 | go func(fi os.FileInfo) { |
| 1032 | defer wg.Done() |
| 1033 | x.visitFile(dirname, fi) |
| 1034 | }(fi) |
| 1035 | } |
| 1036 | }(dirname) |
| 1037 | } |
| 1038 | wg.Wait() |
| 1039 | |
| 1040 | if !c.IndexFullText { |
| 1041 | // the file set, the current file, and the sources are |
| 1042 | // not needed after indexing if no text index is built - |
| 1043 | // help GC and clear them |
| 1044 | x.fset = nil |
| 1045 | x.sources.Reset() |
| 1046 | x.current = nil // contains reference to fset! |
| 1047 | } |
| 1048 | |
| 1049 | // for each word, reduce the RunLists into a LookupResult; |
| 1050 | // also collect the word with its canonical spelling in a |
| 1051 | // word list for later computation of alternative spellings |
| 1052 | words := make(map[string]*LookupResult) |
| 1053 | var wlist RunList |
| 1054 | for w, h := range x.words { |
| 1055 | decls := reduce(h.Decls) |
| 1056 | others := reduce(h.Others) |
| 1057 | words[w] = &LookupResult{ |
| 1058 | Decls: decls, |
| 1059 | Others: others, |
| 1060 | } |
| 1061 | wlist = append(wlist, &wordPair{canonical(w), w}) |
| 1062 | x.throttle.Throttle() |
| 1063 | } |
| 1064 | x.stats.Words = len(words) |
| 1065 | |
| 1066 | // reduce the word list {canonical(w), w} into |
| 1067 | // a list of AltWords runs {canonical(w), {w}} |
| 1068 | alist := wlist.reduce(lessWordPair, newAltWords) |
| 1069 | |
| 1070 | // convert alist into a map of alternative spellings |
| 1071 | alts := make(map[string]*AltWords) |
| 1072 | for i := 0; i < len(alist); i++ { |
| 1073 | a := alist[i].(*AltWords) |
| 1074 | alts[a.Canon] = a |
| 1075 | } |
| 1076 | |
| 1077 | // create text index |
| 1078 | var suffixes *suffixarray.Index |
| 1079 | if c.IndexFullText { |
| 1080 | suffixes = suffixarray.New(x.sources.Bytes()) |
| 1081 | } |
| 1082 | |
| 1083 | // sort idents by the number of imports of their respective packages |
| 1084 | for _, idMap := range x.idents { |
| 1085 | for _, ir := range idMap { |
| 1086 | sort.Sort(byImportCount{ir, x.importCount}) |
| 1087 | } |
| 1088 | } |
| 1089 | |
| 1090 | return &Index{ |
| 1091 | fset: x.fset, |
| 1092 | suffixes: suffixes, |
| 1093 | words: words, |
| 1094 | alts: alts, |
| 1095 | snippets: x.snippets, |
| 1096 | stats: x.stats, |
| 1097 | importCount: x.importCount, |
| 1098 | packagePath: x.packagePath, |
| 1099 | exports: x.exports, |
| 1100 | idents: x.idents, |
| 1101 | opts: indexOptions{ |
| 1102 | Docs: x.c.IndexDocs, |
| 1103 | GoCode: x.c.IndexGoCode, |
| 1104 | FullText: x.c.IndexFullText, |
| 1105 | MaxResults: x.c.MaxResults, |
| 1106 | }, |
| 1107 | } |
| 1108 | } |
| 1109 | |
| 1110 | var ErrFileIndexVersion = errors.New("file index version out of date") |
| 1111 | |
| 1112 | const fileIndexVersion = 3 |
| 1113 | |
| 1114 | // fileIndex is the subset of Index that's gob-encoded for use by |
| 1115 | // Index.Write and Index.Read. |
| 1116 | type fileIndex struct { |
| 1117 | Version int |
| 1118 | Words map[string]*LookupResult |
| 1119 | Alts map[string]*AltWords |
| 1120 | Snippets []*Snippet |
| 1121 | Fulltext bool |
| 1122 | Stats Statistics |
| 1123 | ImportCount map[string]int |
| 1124 | PackagePath map[string]map[string]bool |
| 1125 | Exports map[string]map[string]SpotKind |
| 1126 | Idents map[SpotKind]map[string][]Ident |
| 1127 | Opts indexOptions |
| 1128 | } |
| 1129 | |
| 1130 | func (x *fileIndex) Write(w io.Writer) error { |
| 1131 | return gob.NewEncoder(w).Encode(x) |
| 1132 | } |
| 1133 | |
| 1134 | func (x *fileIndex) Read(r io.Reader) error { |
| 1135 | return gob.NewDecoder(r).Decode(x) |
| 1136 | } |
| 1137 | |
| 1138 | // WriteTo writes the index x to w. |
| 1139 | func (x *Index) WriteTo(w io.Writer) (n int64, err error) { |
| 1140 | w = countingWriter{&n, w} |
| 1141 | fulltext := false |
| 1142 | if x.suffixes != nil { |
| 1143 | fulltext = true |
| 1144 | } |
| 1145 | fx := fileIndex{ |
| 1146 | Version: fileIndexVersion, |
| 1147 | Words: x.words, |
| 1148 | Alts: x.alts, |
| 1149 | Snippets: x.snippets, |
| 1150 | Fulltext: fulltext, |
| 1151 | Stats: x.stats, |
| 1152 | ImportCount: x.importCount, |
| 1153 | PackagePath: x.packagePath, |
| 1154 | Exports: x.exports, |
| 1155 | Idents: x.idents, |
| 1156 | Opts: x.opts, |
| 1157 | } |
| 1158 | if err := fx.Write(w); err != nil { |
| 1159 | return 0, err |
| 1160 | } |
| 1161 | if fulltext { |
| 1162 | encode := func(x interface{}) error { |
| 1163 | return gob.NewEncoder(w).Encode(x) |
| 1164 | } |
| 1165 | if err := x.fset.Write(encode); err != nil { |
| 1166 | return 0, err |
| 1167 | } |
| 1168 | if err := x.suffixes.Write(w); err != nil { |
| 1169 | return 0, err |
| 1170 | } |
| 1171 | } |
| 1172 | return n, nil |
| 1173 | } |
| 1174 | |
| 1175 | // ReadFrom reads the index from r into x; x must not be nil. |
| 1176 | // If r does not also implement io.ByteReader, it will be wrapped in a bufio.Reader. |
| 1177 | // If the index is from an old version, the error is ErrFileIndexVersion. |
| 1178 | func (x *Index) ReadFrom(r io.Reader) (n int64, err error) { |
| 1179 | // We use the ability to read bytes as a plausible surrogate for buffering. |
| 1180 | if _, ok := r.(io.ByteReader); !ok { |
| 1181 | r = bufio.NewReader(r) |
| 1182 | } |
| 1183 | r = countingReader{&n, r.(byteReader)} |
| 1184 | var fx fileIndex |
| 1185 | if err := fx.Read(r); err != nil { |
| 1186 | return n, err |
| 1187 | } |
| 1188 | if fx.Version != fileIndexVersion { |
| 1189 | return 0, ErrFileIndexVersion |
| 1190 | } |
| 1191 | x.words = fx.Words |
| 1192 | x.alts = fx.Alts |
| 1193 | x.snippets = fx.Snippets |
| 1194 | x.stats = fx.Stats |
| 1195 | x.importCount = fx.ImportCount |
| 1196 | x.packagePath = fx.PackagePath |
| 1197 | x.exports = fx.Exports |
| 1198 | x.idents = fx.Idents |
| 1199 | x.opts = fx.Opts |
| 1200 | if fx.Fulltext { |
| 1201 | x.fset = token.NewFileSet() |
| 1202 | decode := func(x interface{}) error { |
| 1203 | return gob.NewDecoder(r).Decode(x) |
| 1204 | } |
| 1205 | if err := x.fset.Read(decode); err != nil { |
| 1206 | return n, err |
| 1207 | } |
| 1208 | x.suffixes = new(suffixarray.Index) |
| 1209 | if err := x.suffixes.Read(r); err != nil { |
| 1210 | return n, err |
| 1211 | } |
| 1212 | } |
| 1213 | return n, nil |
| 1214 | } |
| 1215 | |
| 1216 | // Stats returns index statistics. |
| 1217 | func (x *Index) Stats() Statistics { |
| 1218 | return x.stats |
| 1219 | } |
| 1220 | |
| 1221 | // ImportCount returns a map from import paths to how many times they were seen. |
| 1222 | func (x *Index) ImportCount() map[string]int { |
| 1223 | return x.importCount |
| 1224 | } |
| 1225 | |
| 1226 | // PackagePath returns a map from short package name to a set |
| 1227 | // of full package path names that use that short package name. |
| 1228 | func (x *Index) PackagePath() map[string]map[string]bool { |
| 1229 | return x.packagePath |
| 1230 | } |
| 1231 | |
| 1232 | // Exports returns a map from full package path to exported |
| 1233 | // symbol name to its type. |
| 1234 | func (x *Index) Exports() map[string]map[string]SpotKind { |
| 1235 | return x.exports |
| 1236 | } |
| 1237 | |
| 1238 | // Idents returns a map from identifier type to exported |
| 1239 | // symbol name to the list of identifiers matching that name. |
| 1240 | func (x *Index) Idents() map[SpotKind]map[string][]Ident { |
| 1241 | return x.idents |
| 1242 | } |
| 1243 | |
| 1244 | func (x *Index) lookupWord(w string) (match *LookupResult, alt *AltWords) { |
| 1245 | match = x.words[w] |
| 1246 | alt = x.alts[canonical(w)] |
| 1247 | // remove current spelling from alternatives |
| 1248 | // (if there is no match, the alternatives do |
| 1249 | // not contain the current spelling) |
| 1250 | if match != nil && alt != nil { |
| 1251 | alt = alt.filter(w) |
| 1252 | } |
| 1253 | return |
| 1254 | } |
| 1255 | |
| 1256 | // isIdentifier reports whether s is a Go identifier. |
| 1257 | func isIdentifier(s string) bool { |
| 1258 | for i, ch := range s { |
| 1259 | if unicode.IsLetter(ch) || ch == '_' || i > 0 && unicode.IsDigit(ch) { |
| 1260 | continue |
| 1261 | } |
| 1262 | return false |
| 1263 | } |
| 1264 | return len(s) > 0 |
| 1265 | } |
| 1266 | |
| 1267 | // For a given query, which is either a single identifier or a qualified |
| 1268 | // identifier, Lookup returns a SearchResult containing packages, a LookupResult, a |
| 1269 | // list of alternative spellings, and identifiers, if any. Any and all results |
| 1270 | // may be nil. If the query syntax is wrong, an error is reported. |
| 1271 | func (x *Index) Lookup(query string) (*SearchResult, error) { |
| 1272 | ss := strings.Split(query, ".") |
| 1273 | |
| 1274 | // check query syntax |
| 1275 | for _, s := range ss { |
| 1276 | if !isIdentifier(s) { |
| 1277 | return nil, errors.New("all query parts must be identifiers") |
| 1278 | } |
| 1279 | } |
| 1280 | rslt := &SearchResult{ |
| 1281 | Query: query, |
| 1282 | Idents: make(map[SpotKind][]Ident, 5), |
| 1283 | } |
| 1284 | // handle simple and qualified identifiers |
| 1285 | switch len(ss) { |
| 1286 | case 1: |
| 1287 | ident := ss[0] |
| 1288 | rslt.Hit, rslt.Alt = x.lookupWord(ident) |
| 1289 | if rslt.Hit != nil { |
| 1290 | // found a match - filter packages with same name |
| 1291 | // for the list of packages called ident, if any |
| 1292 | rslt.Pak = rslt.Hit.Others.filter(ident) |
| 1293 | } |
| 1294 | for k, v := range x.idents { |
| 1295 | const rsltLimit = 50 |
| 1296 | ids := byImportCount{v[ident], x.importCount} |
| 1297 | rslt.Idents[k] = ids.top(rsltLimit) |
| 1298 | } |
| 1299 | |
| 1300 | case 2: |
| 1301 | pakname, ident := ss[0], ss[1] |
| 1302 | rslt.Hit, rslt.Alt = x.lookupWord(ident) |
| 1303 | if rslt.Hit != nil { |
| 1304 | // found a match - filter by package name |
| 1305 | // (no paks - package names are not qualified) |
| 1306 | decls := rslt.Hit.Decls.filter(pakname) |
| 1307 | others := rslt.Hit.Others.filter(pakname) |
| 1308 | rslt.Hit = &LookupResult{decls, others} |
| 1309 | } |
| 1310 | for k, v := range x.idents { |
| 1311 | ids := byImportCount{v[ident], x.importCount} |
| 1312 | rslt.Idents[k] = ids.filter(pakname) |
| 1313 | } |
| 1314 | |
| 1315 | default: |
| 1316 | return nil, errors.New("query is not a (qualified) identifier") |
| 1317 | } |
| 1318 | |
| 1319 | return rslt, nil |
| 1320 | } |
| 1321 | |
| 1322 | func (x *Index) Snippet(i int) *Snippet { |
| 1323 | // handle illegal snippet indices gracefully |
| 1324 | if 0 <= i && i < len(x.snippets) { |
| 1325 | return x.snippets[i] |
| 1326 | } |
| 1327 | return nil |
| 1328 | } |
| 1329 | |
| 1330 | type positionList []struct { |
| 1331 | filename string |
| 1332 | line int |
| 1333 | } |
| 1334 | |
| 1335 | func (list positionList) Len() int { return len(list) } |
| 1336 | func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename } |
| 1337 | func (list positionList) Swap(i, j int) { list[i], list[j] = list[j], list[i] } |
| 1338 | |
| 1339 | // unique returns the list sorted and with duplicate entries removed |
| 1340 | func unique(list []int) []int { |
| 1341 | sort.Ints(list) |
| 1342 | var last int |
| 1343 | i := 0 |
| 1344 | for _, x := range list { |
| 1345 | if i == 0 || x != last { |
| 1346 | last = x |
| 1347 | list[i] = x |
| 1348 | i++ |
| 1349 | } |
| 1350 | } |
| 1351 | return list[0:i] |
| 1352 | } |
| 1353 | |
| 1354 | // A FileLines value specifies a file and line numbers within that file. |
| 1355 | type FileLines struct { |
| 1356 | Filename string |
| 1357 | Lines []int |
| 1358 | } |
| 1359 | |
| 1360 | // LookupRegexp returns the number of matches and the matches where a regular |
| 1361 | // expression r is found in the full text index. At most n matches are |
| 1362 | // returned (thus found <= n). |
| 1363 | func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) { |
| 1364 | if x.suffixes == nil || n <= 0 { |
| 1365 | return |
| 1366 | } |
| 1367 | // n > 0 |
| 1368 | |
| 1369 | var list positionList |
| 1370 | // FindAllIndex may returns matches that span across file boundaries. |
| 1371 | // Such matches are unlikely, buf after eliminating them we may end up |
| 1372 | // with fewer than n matches. If we don't have enough at the end, redo |
| 1373 | // the search with an increased value n1, but only if FindAllIndex |
| 1374 | // returned all the requested matches in the first place (if it |
| 1375 | // returned fewer than that there cannot be more). |
| 1376 | for n1 := n; found < n; n1 += n - found { |
| 1377 | found = 0 |
| 1378 | matches := x.suffixes.FindAllIndex(r, n1) |
| 1379 | // compute files, exclude matches that span file boundaries, |
| 1380 | // and map offsets to file-local offsets |
| 1381 | list = make(positionList, len(matches)) |
| 1382 | for _, m := range matches { |
| 1383 | // by construction, an offset corresponds to the Pos value |
| 1384 | // for the file set - use it to get the file and line |
| 1385 | p := token.Pos(m[0]) |
| 1386 | if file := x.fset.File(p); file != nil { |
| 1387 | if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() { |
| 1388 | // match [m[0], m[1]) is within the file boundaries |
| 1389 | list[found].filename = file.Name() |
| 1390 | list[found].line = file.Line(p) |
| 1391 | found++ |
| 1392 | } |
| 1393 | } |
| 1394 | } |
| 1395 | if found == n || len(matches) < n1 { |
| 1396 | // found all matches or there's no chance to find more |
| 1397 | break |
| 1398 | } |
| 1399 | } |
| 1400 | list = list[0:found] |
| 1401 | sort.Sort(list) // sort by filename |
| 1402 | |
| 1403 | // collect matches belonging to the same file |
| 1404 | var last string |
| 1405 | var lines []int |
| 1406 | addLines := func() { |
| 1407 | if len(lines) > 0 { |
| 1408 | // remove duplicate lines |
| 1409 | result = append(result, FileLines{last, unique(lines)}) |
| 1410 | lines = nil |
| 1411 | } |
| 1412 | } |
| 1413 | for _, m := range list { |
| 1414 | if m.filename != last { |
| 1415 | addLines() |
| 1416 | last = m.filename |
| 1417 | } |
| 1418 | lines = append(lines, m.line) |
| 1419 | } |
| 1420 | addLines() |
| 1421 | |
| 1422 | return |
| 1423 | } |
| 1424 | |
| 1425 | // InvalidateIndex should be called whenever any of the file systems |
| 1426 | // under godoc's observation change so that the indexer is kicked on. |
| 1427 | func (c *Corpus) invalidateIndex() { |
| 1428 | c.fsModified.Set(nil) |
| 1429 | c.refreshMetadata() |
| 1430 | } |
| 1431 | |
| 1432 | // feedDirnames feeds the directory names of all directories |
| 1433 | // under the file system given by root to channel c. |
| 1434 | func (c *Corpus) feedDirnames(ch chan<- string) { |
| 1435 | if dir, _ := c.fsTree.Get(); dir != nil { |
| 1436 | for d := range dir.(*Directory).iter(false) { |
| 1437 | ch <- d.Path |
| 1438 | } |
| 1439 | } |
| 1440 | } |
| 1441 | |
| 1442 | // fsDirnames() returns a channel sending all directory names |
| 1443 | // of all the file systems under godoc's observation. |
| 1444 | func (c *Corpus) fsDirnames() <-chan string { |
| 1445 | ch := make(chan string, 256) // buffered for fewer context switches |
| 1446 | go func() { |
| 1447 | c.feedDirnames(ch) |
| 1448 | close(ch) |
| 1449 | }() |
| 1450 | return ch |
| 1451 | } |
| 1452 | |
| 1453 | // CompatibleWith reports whether the Index x is compatible with the corpus |
| 1454 | // indexing options set in c. |
| 1455 | func (x *Index) CompatibleWith(c *Corpus) bool { |
| 1456 | return x.opts.Docs == c.IndexDocs && |
| 1457 | x.opts.GoCode == c.IndexGoCode && |
| 1458 | x.opts.FullText == c.IndexFullText && |
| 1459 | x.opts.MaxResults == c.MaxResults |
| 1460 | } |
| 1461 | |
| 1462 | func (c *Corpus) readIndex(filenames string) error { |
| 1463 | matches, err := filepath.Glob(filenames) |
| 1464 | if err != nil { |
| 1465 | return err |
| 1466 | } else if matches == nil { |
| 1467 | return fmt.Errorf("no index files match %q", filenames) |
| 1468 | } |
| 1469 | sort.Strings(matches) // make sure files are in the right order |
| 1470 | files := make([]io.Reader, 0, len(matches)) |
| 1471 | for _, filename := range matches { |
| 1472 | f, err := os.Open(filename) |
| 1473 | if err != nil { |
| 1474 | return err |
| 1475 | } |
| 1476 | defer f.Close() |
| 1477 | files = append(files, f) |
| 1478 | } |
| 1479 | return c.ReadIndexFrom(io.MultiReader(files...)) |
| 1480 | } |
| 1481 | |
| 1482 | // ReadIndexFrom sets the current index from the serialized version found in r. |
| 1483 | func (c *Corpus) ReadIndexFrom(r io.Reader) error { |
| 1484 | x := new(Index) |
| 1485 | if _, err := x.ReadFrom(r); err != nil { |
| 1486 | return err |
| 1487 | } |
| 1488 | if !x.CompatibleWith(c) { |
| 1489 | return fmt.Errorf("index file options are incompatible: %v", x.opts) |
| 1490 | } |
| 1491 | c.searchIndex.Set(x) |
| 1492 | return nil |
| 1493 | } |
| 1494 | |
| 1495 | func (c *Corpus) UpdateIndex() { |
| 1496 | if c.Verbose { |
| 1497 | log.Printf("updating index...") |
| 1498 | } |
| 1499 | start := time.Now() |
| 1500 | index := c.NewIndex() |
| 1501 | stop := time.Now() |
| 1502 | c.searchIndex.Set(index) |
| 1503 | if c.Verbose { |
| 1504 | secs := stop.Sub(start).Seconds() |
| 1505 | stats := index.Stats() |
| 1506 | log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)", |
| 1507 | secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots) |
| 1508 | } |
| 1509 | memstats := new(runtime.MemStats) |
| 1510 | runtime.ReadMemStats(memstats) |
| 1511 | if c.Verbose { |
| 1512 | log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys) |
| 1513 | } |
| 1514 | runtime.GC() |
| 1515 | runtime.ReadMemStats(memstats) |
| 1516 | if c.Verbose { |
| 1517 | log.Printf("after GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys) |
| 1518 | } |
| 1519 | } |
| 1520 | |
| 1521 | // RunIndexer runs forever, indexing. |
| 1522 | func (c *Corpus) RunIndexer() { |
| 1523 | // initialize the index from disk if possible |
| 1524 | if c.IndexFiles != "" { |
| 1525 | c.initFSTree() |
| 1526 | if err := c.readIndex(c.IndexFiles); err != nil { |
| 1527 | log.Printf("error reading index from file %s: %v", c.IndexFiles, err) |
| 1528 | } |
| 1529 | return |
| 1530 | } |
| 1531 | |
| 1532 | // Repeatedly update the package directory tree and index. |
| 1533 | for { |
| 1534 | c.initFSTree() |
| 1535 | c.UpdateIndex() |
| 1536 | if c.IndexInterval < 0 { |
| 1537 | return |
| 1538 | } |
| 1539 | delay := 5 * time.Minute // by default, reindex every 5 minutes |
| 1540 | if c.IndexInterval > 0 { |
| 1541 | delay = c.IndexInterval |
| 1542 | } |
| 1543 | time.Sleep(delay) |
| 1544 | } |
| 1545 | } |
| 1546 | |
| 1547 | type countingWriter struct { |
| 1548 | n *int64 |
| 1549 | w io.Writer |
| 1550 | } |
| 1551 | |
| 1552 | func (c countingWriter) Write(p []byte) (n int, err error) { |
| 1553 | n, err = c.w.Write(p) |
| 1554 | *c.n += int64(n) |
| 1555 | return |
| 1556 | } |
| 1557 | |
| 1558 | type byteReader interface { |
| 1559 | io.Reader |
| 1560 | io.ByteReader |
| 1561 | } |
| 1562 | |
| 1563 | type countingReader struct { |
| 1564 | n *int64 |
| 1565 | r byteReader |
| 1566 | } |
| 1567 | |
| 1568 | func (c countingReader) Read(p []byte) (n int, err error) { |
| 1569 | n, err = c.r.Read(p) |
| 1570 | *c.n += int64(n) |
| 1571 | return |
| 1572 | } |
| 1573 | |
| 1574 | func (c countingReader) ReadByte() (b byte, err error) { |
| 1575 | b, err = c.r.ReadByte() |
| 1576 | *c.n += 1 |
| 1577 | return |
| 1578 | } |
| 1579 |
Members