1 | // Copyright 2018 The Go Authors. All rights reserved. |
---|---|
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | //go:build !js && !nacl && !plan9 && !solaris && !windows |
6 | // +build !js,!nacl,!plan9,!solaris,!windows |
7 | |
8 | /* |
9 | Splitdwarf uncompresses and copies the DWARF segment of a Mach-O |
10 | executable into the "dSYM" file expected by lldb and ports of gdb |
11 | on OSX. |
12 | |
13 | Usage: splitdwarf osxMachoFile [ osxDsymFile ] |
14 | |
15 | Unless a dSYM file name is provided on the command line, |
16 | splitdwarf will place it where the OSX tools expect it, in |
17 | "<osxMachoFile>.dSYM/Contents/Resources/DWARF/<osxMachoFile>", |
18 | creating directories as necessary. |
19 | */ |
20 | package main // import "golang.org/x/tools/cmd/splitdwarf" |
21 | |
22 | import ( |
23 | "crypto/sha256" |
24 | "fmt" |
25 | "io" |
26 | "os" |
27 | "path/filepath" |
28 | "strings" |
29 | "syscall" |
30 | |
31 | "golang.org/x/tools/cmd/splitdwarf/internal/macho" |
32 | ) |
33 | |
34 | const ( |
35 | pageAlign = 12 // 4096 = 1 << 12 |
36 | ) |
37 | |
38 | func note(format string, why ...interface{}) { |
39 | fmt.Fprintf(os.Stderr, format+"\n", why...) |
40 | } |
41 | |
42 | func fail(format string, why ...interface{}) { |
43 | note(format, why...) |
44 | os.Exit(1) |
45 | } |
46 | |
47 | // splitdwarf inputexe [ outputdwarf ] |
48 | func main() { |
49 | if len(os.Args) < 2 || len(os.Args) > 3 { |
50 | fmt.Printf(` |
51 | Usage: %s input_exe [ output_dsym ] |
52 | Reads the executable input_exe, uncompresses and copies debugging |
53 | information into output_dsym. If output_dsym is not specified, |
54 | the path |
55 | input_exe.dSYM/Contents/Resources/DWARF/input_exe |
56 | is used instead. That is the path that gdb and lldb expect |
57 | on OSX. Input_exe needs a UUID segment; if that is missing, |
58 | then one is created and added. In that case, the permissions |
59 | for input_exe need to allow writing. |
60 | `, os.Args[0]) |
61 | return |
62 | } |
63 | |
64 | // Read input, find DWARF, be sure it looks right |
65 | inputExe := os.Args[1] |
66 | exeFile, err := os.Open(inputExe) |
67 | if err != nil { |
68 | fail("%v", err) |
69 | } |
70 | exeMacho, err := macho.NewFile(exeFile) |
71 | if err != nil { |
72 | fail("(internal) Couldn't create macho, %v", err) |
73 | } |
74 | // Postpone dealing with output till input is known-good |
75 | |
76 | // describe(&exeMacho.FileTOC) |
77 | |
78 | // Offsets into __LINKEDIT: |
79 | // |
80 | // Command LC_SYMTAB = |
81 | // (1) number of symbols at file offset (within link edit section) of 16-byte symbol table entries |
82 | // struct { |
83 | // StringTableIndex uint32 |
84 | // Type, SectionIndex uint8 |
85 | // Description uint16 |
86 | // Value uint64 |
87 | // } |
88 | // |
89 | // (2) string table offset and size. Strings are zero-byte terminated. First must be " ". |
90 | // |
91 | // Command LC_DYSYMTAB = indices within symtab (above), except for IndSym |
92 | // IndSym Offset = file offset (within link edit section) of 4-byte indices within symtab. |
93 | // |
94 | // Section __TEXT.__symbol_stub1. |
95 | // Offset and size (Reserved2) locate and describe a table for this section. |
96 | // Symbols beginning at IndirectSymIndex (Reserved1) (see LC_DYSYMTAB.IndSymOffset) refer to this table. |
97 | // (These table entries are apparently PLTs [Procedure Linkage Table/Trampoline]) |
98 | // |
99 | // Section __DATA.__nl_symbol_ptr. |
100 | // Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset) |
101 | // Some of these symbols appear to be duplicates of other indirect symbols appearing early |
102 | // |
103 | // Section __DATA.__la_symbol_ptr. |
104 | // Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset) |
105 | // Some of these symbols appear to be duplicates of other indirect symbols appearing early |
106 | // |
107 | |
108 | // Create a File for the output dwarf. |
109 | // Copy header, file type is MH_DSYM |
110 | // Copy the relevant load commands |
111 | |
112 | // LoadCmdUuid |
113 | // Symtab -- very abbreviated (Use DYSYMTAB Iextdefsym, Nextdefsym to identify these). |
114 | // Segment __PAGEZERO |
115 | // Segment __TEXT (zero the size, zero the offset of each section) |
116 | // Segment __DATA (zero the size, zero the offset of each section) |
117 | // Segment __LINKEDIT (contains the symbols and strings from Symtab) |
118 | // Segment __DWARF (uncompressed) |
119 | |
120 | var uuid *macho.Uuid |
121 | for _, l := range exeMacho.Loads { |
122 | switch l.Command() { |
123 | case macho.LcUuid: |
124 | uuid = l.(*macho.Uuid) |
125 | } |
126 | } |
127 | |
128 | // Ensure a given load is not nil |
129 | nonnilC := func(l macho.Load, s string) { |
130 | if l == nil { |
131 | fail("input file %s lacks load command %s", inputExe, s) |
132 | } |
133 | } |
134 | |
135 | // Find a segment by name and ensure it is not nil |
136 | nonnilS := func(s string) *macho.Segment { |
137 | l := exeMacho.Segment(s) |
138 | if l == nil { |
139 | fail("input file %s lacks segment %s", inputExe, s) |
140 | } |
141 | return l |
142 | } |
143 | |
144 | newtoc := exeMacho.FileTOC.DerivedCopy(macho.MhDsym, 0) |
145 | |
146 | symtab := exeMacho.Symtab |
147 | dysymtab := exeMacho.Dysymtab // Not appearing in output, but necessary to construct output |
148 | nonnilC(symtab, "symtab") |
149 | nonnilC(dysymtab, "dysymtab") |
150 | text := nonnilS("__TEXT") |
151 | data := nonnilS("__DATA") |
152 | linkedit := nonnilS("__LINKEDIT") |
153 | pagezero := nonnilS("__PAGEZERO") |
154 | |
155 | newtext := text.CopyZeroed() |
156 | newdata := data.CopyZeroed() |
157 | newsymtab := symtab.Copy() |
158 | |
159 | // Linkedit segment contain symbols and strings; |
160 | // Symtab refers to offsets into linkedit. |
161 | // This next bit initializes newsymtab and sets up data structures for the linkedit segment |
162 | linkeditsyms := []macho.Nlist64{} |
163 | linkeditstrings := []string{} |
164 | |
165 | // Linkedit will begin at the second page, i.e., offset is one page from beginning |
166 | // Symbols come first |
167 | linkeditsymbase := uint32(1) << pageAlign |
168 | |
169 | // Strings come second, offset by the number of symbols times their size. |
170 | // Only those symbols from dysymtab.defsym are written into the debugging information. |
171 | linkeditstringbase := linkeditsymbase + exeMacho.FileTOC.SymbolSize()*dysymtab.Nextdefsym |
172 | |
173 | // The first two bytes of the strings are reserved for space, null (' ', \000) |
174 | linkeditstringcur := uint32(2) |
175 | |
176 | newsymtab.Syms = newsymtab.Syms[:0] |
177 | newsymtab.Symoff = linkeditsymbase |
178 | newsymtab.Stroff = linkeditstringbase |
179 | newsymtab.Nsyms = dysymtab.Nextdefsym |
180 | for i := uint32(0); i < dysymtab.Nextdefsym; i++ { |
181 | ii := i + dysymtab.Iextdefsym |
182 | oldsym := symtab.Syms[ii] |
183 | newsymtab.Syms = append(newsymtab.Syms, oldsym) |
184 | |
185 | linkeditsyms = append(linkeditsyms, macho.Nlist64{Name: linkeditstringcur, |
186 | Type: oldsym.Type, Sect: oldsym.Sect, Desc: oldsym.Desc, Value: oldsym.Value}) |
187 | linkeditstringcur += uint32(len(oldsym.Name)) + 1 |
188 | linkeditstrings = append(linkeditstrings, oldsym.Name) |
189 | } |
190 | newsymtab.Strsize = linkeditstringcur |
191 | |
192 | exeNeedsUuid := uuid == nil |
193 | if exeNeedsUuid { |
194 | uuid = &macho.Uuid{macho.UuidCmd{LoadCmd: macho.LcUuid}} |
195 | uuid.Len = uuid.LoadSize(newtoc) |
196 | copy(uuid.Id[0:], contentuuid(&exeMacho.FileTOC)[0:16]) |
197 | uuid.Id[6] = uuid.Id[6]&^0xf0 | 0x40 // version 4 (pseudo-random); see section 4.1.3 |
198 | uuid.Id[8] = uuid.Id[8]&^0xc0 | 0x80 // variant bits; see section 4.1.1 |
199 | } |
200 | newtoc.AddLoad(uuid) |
201 | |
202 | // For the specified segment (assumed to be in exeMacho) make a copy of its |
203 | // sections with appropriate fields zeroed out, and append them to the |
204 | // currently-last segment in newtoc. |
205 | copyZOdSections := func(g *macho.Segment) { |
206 | for i := g.Firstsect; i < g.Firstsect+g.Nsect; i++ { |
207 | s := exeMacho.Sections[i].Copy() |
208 | s.Offset = 0 |
209 | s.Reloff = 0 |
210 | s.Nreloc = 0 |
211 | newtoc.AddSection(s) |
212 | } |
213 | } |
214 | |
215 | newtoc.AddLoad(newsymtab) |
216 | newtoc.AddSegment(pagezero) |
217 | newtoc.AddSegment(newtext) |
218 | copyZOdSections(text) |
219 | newtoc.AddSegment(newdata) |
220 | copyZOdSections(data) |
221 | |
222 | newlinkedit := linkedit.Copy() |
223 | newlinkedit.Offset = uint64(linkeditsymbase) |
224 | newlinkedit.Filesz = uint64(linkeditstringcur) |
225 | newlinkedit.Addr = macho.RoundUp(newdata.Addr+newdata.Memsz, 1<<pageAlign) // Follows data sections in file |
226 | newlinkedit.Memsz = macho.RoundUp(newlinkedit.Filesz, 1<<pageAlign) |
227 | // The rest should copy over fine. |
228 | newtoc.AddSegment(newlinkedit) |
229 | |
230 | dwarf := nonnilS("__DWARF") |
231 | newdwarf := dwarf.CopyZeroed() |
232 | newdwarf.Offset = macho.RoundUp(newlinkedit.Offset+newlinkedit.Filesz, 1<<pageAlign) |
233 | newdwarf.Filesz = dwarf.UncompressedSize(&exeMacho.FileTOC, 1) |
234 | newdwarf.Addr = newlinkedit.Addr + newlinkedit.Memsz // Follows linkedit sections in file. |
235 | newdwarf.Memsz = macho.RoundUp(newdwarf.Filesz, 1<<pageAlign) |
236 | newtoc.AddSegment(newdwarf) |
237 | |
238 | // Map out Dwarf sections (that is, this is section descriptors, not their contents). |
239 | offset := uint32(newdwarf.Offset) |
240 | for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ { |
241 | o := exeMacho.Sections[i] |
242 | s := o.Copy() |
243 | s.Offset = offset |
244 | us := o.UncompressedSize() |
245 | if s.Size < us { |
246 | s.Size = uint64(us) |
247 | s.Align = 0 // This is apparently true for debugging sections; not sure if it generalizes. |
248 | } |
249 | offset += uint32(us) |
250 | if strings.HasPrefix(s.Name, "__z") { |
251 | s.Name = "__" + s.Name[3:] // remove "z" |
252 | } |
253 | s.Reloff = 0 |
254 | s.Nreloc = 0 |
255 | newtoc.AddSection(s) |
256 | } |
257 | |
258 | // Write segments/sections. |
259 | // Only dwarf and linkedit contain anything interesting. |
260 | |
261 | // Memory map the output file to get the buffer directly. |
262 | outDwarf := inputExe + ".dSYM/Contents/Resources/DWARF" |
263 | if len(os.Args) > 2 { |
264 | outDwarf = os.Args[2] |
265 | } else { |
266 | err := os.MkdirAll(outDwarf, 0755) |
267 | if err != nil { |
268 | fail("%v", err) |
269 | } |
270 | outDwarf = filepath.Join(outDwarf, filepath.Base(inputExe)) |
271 | } |
272 | dwarfFile, buffer := CreateMmapFile(outDwarf, int64(newtoc.FileSize())) |
273 | |
274 | // (1) Linkedit segment |
275 | // Symbol table |
276 | offset = uint32(newlinkedit.Offset) |
277 | for i := range linkeditsyms { |
278 | if exeMacho.Magic == macho.Magic64 { |
279 | offset += linkeditsyms[i].Put64(buffer[offset:], newtoc.ByteOrder) |
280 | } else { |
281 | offset += linkeditsyms[i].Put32(buffer[offset:], newtoc.ByteOrder) |
282 | } |
283 | } |
284 | |
285 | // Initial two bytes of string table, followed by actual zero-terminated strings. |
286 | buffer[linkeditstringbase] = ' ' |
287 | buffer[linkeditstringbase+1] = 0 |
288 | offset = linkeditstringbase + 2 |
289 | for _, str := range linkeditstrings { |
290 | for i := 0; i < len(str); i++ { |
291 | buffer[offset] = str[i] |
292 | offset++ |
293 | } |
294 | buffer[offset] = 0 |
295 | offset++ |
296 | } |
297 | |
298 | // (2) DWARF segment |
299 | ioff := newdwarf.Firstsect - dwarf.Firstsect |
300 | for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ { |
301 | s := exeMacho.Sections[i] |
302 | j := i + ioff |
303 | s.PutUncompressedData(buffer[newtoc.Sections[j].Offset:]) |
304 | } |
305 | |
306 | // Because "text" overlaps the header and the loads, write them afterwards, just in case. |
307 | // Write header. |
308 | newtoc.Put(buffer) |
309 | |
310 | err = syscall.Munmap(buffer) |
311 | if err != nil { |
312 | fail("Munmap %s for dwarf output failed, %v", outDwarf, err) |
313 | } |
314 | err = dwarfFile.Close() |
315 | if err != nil { |
316 | fail("Close %s for dwarf output after mmap/munmap failed, %v", outDwarf, err) |
317 | } |
318 | |
319 | if exeNeedsUuid { // Map the original exe, modify the header, and write the UUID command |
320 | hdr := exeMacho.FileTOC.FileHeader |
321 | oldCommandEnd := hdr.SizeCommands + newtoc.HdrSize() |
322 | hdr.NCommands += 1 |
323 | hdr.SizeCommands += uuid.LoadSize(newtoc) |
324 | |
325 | mapf, err := os.OpenFile(inputExe, os.O_RDWR, 0) |
326 | if err != nil { |
327 | fail("Updating UUID in binary failed, %v", err) |
328 | } |
329 | exebuf, err := syscall.Mmap(int(mapf.Fd()), 0, int(macho.RoundUp(uint64(hdr.SizeCommands), 1<<pageAlign)), |
330 | syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED) |
331 | if err != nil { |
332 | fail("Mmap of %s for UUID update failed, %v", inputExe, err) |
333 | } |
334 | _ = hdr.Put(exebuf, newtoc.ByteOrder) |
335 | _ = uuid.Put(exebuf[oldCommandEnd:], newtoc.ByteOrder) |
336 | err = syscall.Munmap(exebuf) |
337 | if err != nil { |
338 | fail("Munmap of %s for UUID update failed, %v", inputExe, err) |
339 | } |
340 | } |
341 | } |
342 | |
343 | // CreateMmapFile creates the file 'outDwarf' of the specified size, mmaps that file, |
344 | // and returns the file descriptor and mapped buffer. |
345 | func CreateMmapFile(outDwarf string, size int64) (*os.File, []byte) { |
346 | dwarfFile, err := os.OpenFile(outDwarf, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666) |
347 | if err != nil { |
348 | fail("Open for mmap failed, %v", err) |
349 | } |
350 | err = os.Truncate(outDwarf, size) |
351 | if err != nil { |
352 | fail("Truncate/extend of %s to %d bytes failed, %v", dwarfFile, size, err) |
353 | } |
354 | buffer, err := syscall.Mmap(int(dwarfFile.Fd()), 0, int(size), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED) |
355 | if err != nil { |
356 | fail("Mmap %s for dwarf output update failed, %v", outDwarf, err) |
357 | } |
358 | return dwarfFile, buffer |
359 | } |
360 | |
361 | func describe(exem *macho.FileTOC) { |
362 | note("Type = %s, Flags=0x%x", exem.Type, uint32(exem.Flags)) |
363 | for i, l := range exem.Loads { |
364 | if s, ok := l.(*macho.Segment); ok { |
365 | fmt.Printf("Load %d is Segment %s, offset=0x%x, filesz=%d, addr=0x%x, memsz=%d, nsect=%d\n", i, s.Name, |
366 | s.Offset, s.Filesz, s.Addr, s.Memsz, s.Nsect) |
367 | for j := uint32(0); j < s.Nsect; j++ { |
368 | c := exem.Sections[j+s.Firstsect] |
369 | fmt.Printf(" Section %s, offset=0x%x, size=%d, addr=0x%x, flags=0x%x, nreloc=%d, res1=%d, res2=%d, res3=%d\n", c.Name, c.Offset, c.Size, c.Addr, c.Flags, c.Nreloc, c.Reserved1, c.Reserved2, c.Reserved3) |
370 | } |
371 | } else { |
372 | fmt.Printf("Load %d is %v\n", i, l) |
373 | } |
374 | } |
375 | if exem.SizeCommands != exem.LoadSize() { |
376 | fail("recorded command size %d does not equal computed command size %d", exem.SizeCommands, exem.LoadSize()) |
377 | } else { |
378 | note("recorded command size %d, computed command size %d", exem.SizeCommands, exem.LoadSize()) |
379 | } |
380 | note("File size is %d", exem.FileSize()) |
381 | } |
382 | |
383 | // contentuuid returns a UUID derived from (some of) the content of an executable. |
384 | // specifically included are the non-DWARF sections, specifically excluded are things |
385 | // that surely depend on the presence or absence of DWARF sections (e.g., section |
386 | // numbers, positions with file, number of load commands). |
387 | // (It was considered desirable if this was insensitive to the presence of the |
388 | // __DWARF segment, however because it is not last, it moves other segments, |
389 | // whose contents appear to contain file offset references.) |
390 | func contentuuid(exem *macho.FileTOC) []byte { |
391 | h := sha256.New() |
392 | for _, l := range exem.Loads { |
393 | if l.Command() == macho.LcUuid { |
394 | continue |
395 | } |
396 | if s, ok := l.(*macho.Segment); ok { |
397 | if s.Name == "__DWARF" || s.Name == "__PAGEZERO" { |
398 | continue |
399 | } |
400 | for j := uint32(0); j < s.Nsect; j++ { |
401 | c := exem.Sections[j+s.Firstsect] |
402 | io.Copy(h, c.Open()) |
403 | } |
404 | } // Getting dependence on other load commands right is fiddly. |
405 | } |
406 | return h.Sum(nil) |
407 | } |
408 |
Members