post_build_ninja_summary.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. #!/usr/bin/env python3
  2. # Copyright (c) 2018 The Chromium Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. r"""Summarize the last ninja build, invoked with ninja's -C syntax.
  6. This script is designed to be automatically run after each ninja build in
  7. order to summarize the build's performance. Making build performance information
  8. more visible should make it easier to notice anomalies and opportunities. To use
  9. this script on Windows just set NINJA_SUMMARIZE_BUILD=1 and run autoninja.bat.
  10. On Linux you can get autoninja to invoke this script using this syntax:
  11. $ NINJA_SUMMARIZE_BUILD=1 autoninja -C out/Default/ chrome
  12. You can also call this script directly using ninja's syntax to specify the
  13. output directory of interest:
  14. > python3 post_build_ninja_summary.py -C out/Default
  15. Typical output looks like this:
  16. >ninja -C out\debug_component base
  17. ninja.exe -C out\debug_component base -j 960 -l 48 -d keeprsp
  18. ninja: Entering directory `out\debug_component'
  19. [1 processes, 1/1 @ 0.3/s : 3.092s ] Regenerating ninja files
  20. Longest build steps:
  21. 0.1 weighted s to build obj/base/base/trace_log.obj (6.7 s elapsed time)
  22. 0.2 weighted s to build nasm.exe, nasm.exe.pdb (0.2 s elapsed time)
  23. 0.3 weighted s to build obj/base/base/win_util.obj (12.4 s elapsed time)
  24. 1.2 weighted s to build base.dll, base.dll.lib (1.2 s elapsed time)
  25. Time by build-step type:
  26. 0.0 s weighted time to generate 6 .lib files (0.3 s elapsed time sum)
  27. 0.1 s weighted time to generate 25 .stamp files (1.2 s elapsed time sum)
  28. 0.2 s weighted time to generate 20 .o files (2.8 s elapsed time sum)
  29. 1.7 s weighted time to generate 4 PEFile (linking) files (2.0 s elapsed
  30. time sum)
  31. 23.9 s weighted time to generate 770 .obj files (974.8 s elapsed time sum)
  32. 26.1 s weighted time (982.9 s elapsed time sum, 37.7x parallelism)
  33. 839 build steps completed, average of 32.17/s
  34. If no gn clean has been done then results will be for the last non-NULL
  35. invocation of ninja. Ideas for future statistics, and implementations are
  36. appreciated.
  37. The "weighted" time is the elapsed time of each build step divided by the number
  38. of tasks that were running in parallel. This makes it an excellent approximation
  39. of how "important" a slow step was. A link that is entirely or mostly serialized
  40. will have a weighted time that is the same or similar to its elapsed time. A
  41. compile that runs in parallel with 999 other compiles will have a weighted time
  42. that is tiny."""
  43. import argparse
  44. import errno
  45. import fnmatch
  46. import os
  47. import subprocess
  48. import sys
  49. THIS_DIR = os.path.dirname(__file__)
  50. # The number of long build times to report:
  51. long_count = 10
  52. # The number of long times by extension to report
  53. long_ext_count = 10
  54. class Target:
  55. """Represents a single line read for a .ninja_log file."""
  56. def __init__(self, start, end):
  57. """Creates a target object by passing in the start/end times in seconds
  58. as a float."""
  59. self.start = start
  60. self.end = end
  61. # A list of targets, appended to by the owner of this object.
  62. self.targets = []
  63. self.weighted_duration = 0.0
  64. def Duration(self):
  65. """Returns the task duration in seconds as a float."""
  66. return self.end - self.start
  67. def SetWeightedDuration(self, weighted_duration):
  68. """Sets the duration, in seconds, passed in as a float."""
  69. self.weighted_duration = weighted_duration
  70. def WeightedDuration(self):
  71. """Returns the task's weighted duration in seconds as a float.
  72. Weighted_duration takes the elapsed time of the task and divides it
  73. by how many other tasks were running at the same time. Thus, it
  74. represents the approximate impact of this task on the total build time,
  75. with serialized or serializing steps typically ending up with much
  76. longer weighted durations.
  77. weighted_duration should always be the same or shorter than duration.
  78. """
  79. # Allow for modest floating-point errors
  80. epsilon = 0.000002
  81. if self.weighted_duration > self.Duration() + epsilon:
  82. print("%s > %s?" % (self.weighted_duration, self.Duration()))
  83. assert self.weighted_duration <= self.Duration() + epsilon
  84. return self.weighted_duration
  85. def DescribeTargets(self):
  86. """Returns a printable string that summarizes the targets."""
  87. # Some build steps generate dozens of outputs - handle them sanely.
  88. # The max_length was chosen so that it can fit most of the long
  89. # single-target names, while minimizing word wrapping.
  90. result = ", ".join(self.targets)
  91. max_length = 65
  92. if len(result) > max_length:
  93. result = result[:max_length] + "..."
  94. return result
  95. # Copied with some modifications from ninjatracing
  96. def ReadTargets(log, show_all):
  97. """Reads all targets from .ninja_log file |log_file|, sorted by duration.
  98. The result is a list of Target objects."""
  99. header = log.readline()
  100. # Handle empty ninja_log gracefully by silently returning an empty list of
  101. # targets.
  102. if not header:
  103. return []
  104. assert header in ("# ninja log v5\n", "# ninja log v6\n"), (
  105. "unrecognized ninja log version %r" % header)
  106. targets_dict = {}
  107. last_end_seen = 0.0
  108. for line in log:
  109. parts = line.strip().split("\t")
  110. if len(parts) != 5:
  111. # If ninja.exe is rudely halted then the .ninja_log file may be
  112. # corrupt. Silently continue.
  113. continue
  114. start, end, _, name, cmdhash = parts # Ignore restat.
  115. # Convert from integral milliseconds to float seconds.
  116. start = int(start) / 1000.0
  117. end = int(end) / 1000.0
  118. if not show_all and end < last_end_seen:
  119. # An earlier time stamp means that this step is the first in a new
  120. # build, possibly an incremental build. Throw away the previous
  121. # data so that this new build will be displayed independently.
  122. # This has to be done by comparing end times because records are
  123. # written to the .ninja_log file when commands complete, so end
  124. # times are guaranteed to be in order, but start times are not.
  125. targets_dict = {}
  126. target = None
  127. if cmdhash in targets_dict:
  128. target = targets_dict[cmdhash]
  129. if not show_all and (target.start != start or target.end != end):
  130. # If several builds in a row just run one or two build steps
  131. # then the end times may not go backwards so the last build may
  132. # not be detected as such. However in many cases there will be a
  133. # build step repeated in the two builds and the changed
  134. # start/stop points for that command, identified by the hash,
  135. # can be used to detect and reset the target dictionary.
  136. targets_dict = {}
  137. target = None
  138. if not target:
  139. targets_dict[cmdhash] = target = Target(start, end)
  140. last_end_seen = end
  141. target.targets.append(name)
  142. return list(targets_dict.values())
  143. def GetExtension(target, extra_patterns):
  144. """Return the file extension that best represents a target.
  145. For targets that generate multiple outputs it is important to return a
  146. consistent 'canonical' extension. Ultimately the goal is to group build
  147. steps by type."""
  148. for output in target.targets:
  149. if extra_patterns:
  150. for fn_pattern in extra_patterns.split(";"):
  151. if fnmatch.fnmatch(output, "*" + fn_pattern + "*"):
  152. return fn_pattern
  153. # Not a true extension, but a good grouping.
  154. if output.endswith("type_mappings"):
  155. extension = "type_mappings"
  156. break
  157. # Capture two extensions if present. For example: file.javac.jar should
  158. # be distinguished from file.interface.jar.
  159. root, ext1 = os.path.splitext(output)
  160. _, ext2 = os.path.splitext(root)
  161. extension = ext2 + ext1 # Preserve the order in the file name.
  162. if len(extension) == 0:
  163. extension = "(no extension found)"
  164. if ext1 in [".pdb", ".dll", ".exe"]:
  165. extension = "PEFile (linking)"
  166. # Make sure that .dll and .exe are grouped together and that the
  167. # .dll.lib files don't cause these to be listed as libraries
  168. break
  169. if ext1 in [".so", ".TOC"]:
  170. extension = ".so (linking)"
  171. # Attempt to identify linking, avoid identifying as '.TOC'
  172. break
  173. # Make sure .obj files don't get categorized as mojo files
  174. if ext1 in [".obj", ".o"]:
  175. break
  176. # Jars are the canonical output of java targets.
  177. if ext1 == ".jar":
  178. break
  179. # Normalize all mojo related outputs to 'mojo'.
  180. if output.count(".mojom") > 0:
  181. extension = "mojo"
  182. break
  183. return extension
  184. def SummarizeEntries(entries, extra_step_types, elapsed_time_sorting):
  185. """Print a summary of the passed in list of Target objects."""
  186. # Create a list that is in order by time stamp and has entries for the
  187. # beginning and ending of each build step (one time stamp may have multiple
  188. # entries due to multiple steps starting/stopping at exactly the same time).
  189. # Iterate through this list, keeping track of which tasks are running at all
  190. # times. At each time step calculate a running total for weighted time so
  191. # that when each task ends its own weighted time can easily be calculated.
  192. task_start_stop_times = []
  193. earliest = -1
  194. latest = 0
  195. total_cpu_time = 0
  196. for target in entries:
  197. if earliest < 0 or target.start < earliest:
  198. earliest = target.start
  199. if target.end > latest:
  200. latest = target.end
  201. total_cpu_time += target.Duration()
  202. task_start_stop_times.append((target.start, "start", target))
  203. task_start_stop_times.append((target.end, "stop", target))
  204. length = latest - earliest
  205. weighted_total = 0.0
  206. # Sort by the time/type records and ignore |target|
  207. task_start_stop_times.sort(key=lambda times: times[:2])
  208. # Now we have all task start/stop times sorted by when they happen. If a
  209. # task starts and stops on the same time stamp then the start will come
  210. # first because of the alphabet, which is important for making this work
  211. # correctly.
  212. # Track the tasks which are currently running.
  213. running_tasks = {}
  214. # Record the time we have processed up to so we know how to calculate time
  215. # deltas.
  216. last_time = task_start_stop_times[0][0]
  217. # Track the accumulated weighted time so that it can efficiently be added
  218. # to individual tasks.
  219. last_weighted_time = 0.0
  220. # Scan all start/stop events.
  221. for event in task_start_stop_times:
  222. time, action_name, target = event
  223. # Accumulate weighted time up to now.
  224. num_running = len(running_tasks)
  225. if num_running > 0:
  226. # Update the total weighted time up to this moment.
  227. last_weighted_time += (time - last_time) / float(num_running)
  228. if action_name == "start":
  229. # Record the total weighted task time when this task starts.
  230. running_tasks[target] = last_weighted_time
  231. if action_name == "stop":
  232. # Record the change in the total weighted task time while this task
  233. # ran.
  234. weighted_duration = last_weighted_time - running_tasks[target]
  235. target.SetWeightedDuration(weighted_duration)
  236. weighted_total += weighted_duration
  237. del running_tasks[target]
  238. last_time = time
  239. assert len(running_tasks) == 0
  240. # Warn if the sum of weighted times is off by more than half a second.
  241. if abs(length - weighted_total) > 500:
  242. print("Warning: Possible corrupt ninja log, results may be "
  243. "untrustworthy. Length = %.3f, weighted total = %.3f" %
  244. (length, weighted_total))
  245. # Print the slowest build steps:
  246. print(" Longest build steps:")
  247. if elapsed_time_sorting:
  248. entries.sort(key=lambda x: x.Duration())
  249. else:
  250. entries.sort(key=lambda x: x.WeightedDuration())
  251. for target in entries[-long_count:]:
  252. print(" %8.1f weighted s to build %s (%.1f s elapsed time)" % (
  253. target.WeightedDuration(),
  254. target.DescribeTargets(),
  255. target.Duration(),
  256. ))
  257. # Sum up the time by file extension/type of the output file
  258. count_by_ext = {}
  259. time_by_ext = {}
  260. weighted_time_by_ext = {}
  261. # Scan through all of the targets to build up per-extension statistics.
  262. for target in entries:
  263. extension = GetExtension(target, extra_step_types)
  264. time_by_ext[extension] = (time_by_ext.get(extension, 0) +
  265. target.Duration())
  266. weighted_time_by_ext[extension] = (
  267. weighted_time_by_ext.get(extension, 0) + target.WeightedDuration())
  268. count_by_ext[extension] = count_by_ext.get(extension, 0) + 1
  269. print(" Time by build-step type:")
  270. # Copy to a list with extension name and total time swapped, to (time, ext)
  271. if elapsed_time_sorting:
  272. weighted_time_by_ext_sorted = sorted(
  273. (y, x) for (x, y) in time_by_ext.items())
  274. else:
  275. weighted_time_by_ext_sorted = sorted(
  276. (y, x) for (x, y) in weighted_time_by_ext.items())
  277. # Print the slowest build target types:
  278. for time, extension in weighted_time_by_ext_sorted[-long_ext_count:]:
  279. print(" %8.1f s weighted time to generate %d %s files "
  280. "(%1.1f s elapsed time sum)" % (
  281. time,
  282. count_by_ext[extension],
  283. extension,
  284. time_by_ext[extension],
  285. ))
  286. print(" %.1f s weighted time (%.1f s elapsed time sum, %1.1fx "
  287. "parallelism)" %
  288. (length, total_cpu_time, total_cpu_time * 1.0 / length))
  289. print(" %d build steps completed, average of %1.2f/s" %
  290. (len(entries), len(entries) / (length)))
  291. def main():
  292. log_file = ".ninja_log"
  293. metrics_file = "siso_metrics.json"
  294. parser = argparse.ArgumentParser()
  295. parser.add_argument("-C", dest="build_directory", help="Build directory.")
  296. parser.add_argument(
  297. "-s",
  298. "--step-types",
  299. help="semicolon separated fnmatch patterns for build-step grouping",
  300. )
  301. parser.add_argument(
  302. "-e",
  303. "--elapsed_time_sorting",
  304. default=False,
  305. action="store_true",
  306. help="Sort output by elapsed time instead of weighted time",
  307. )
  308. parser.add_argument("--log-file",
  309. help="specific ninja log file to analyze.")
  310. args, _extra_args = parser.parse_known_args()
  311. if args.build_directory:
  312. log_file = os.path.join(args.build_directory, log_file)
  313. metrics_file = os.path.join(args.build_directory, metrics_file)
  314. if args.log_file:
  315. log_file = args.log_file
  316. if not args.step_types:
  317. # Offer a convenient way to add extra step types automatically,
  318. # including when this script is run by autoninja. get() returns None if
  319. # the variable isn't set.
  320. args.step_types = os.environ.get("chromium_step_types")
  321. if args.step_types:
  322. # Make room for the extra build types.
  323. global long_ext_count
  324. long_ext_count += len(args.step_types.split(";"))
  325. if os.path.exists(metrics_file):
  326. cmd = [sys.executable, os.path.join(THIS_DIR, "siso.py")]
  327. cmd.extend(["metrics", "summary"])
  328. if args.build_directory:
  329. cmd.extend(["-C", args.build_directory])
  330. if args.step_types:
  331. cmd.extend(["--step_types", args.step_types])
  332. if args.elapsed_time_sorting:
  333. cmd.append("--elapsed_time_sorting")
  334. subprocess.run(cmd, check=True)
  335. return 0
  336. try:
  337. with open(log_file, "r") as log:
  338. entries = ReadTargets(log, False)
  339. if entries:
  340. SummarizeEntries(entries, args.step_types,
  341. args.elapsed_time_sorting)
  342. except IOError:
  343. print("Log file %r not found, no build summary created." % log_file)
  344. return errno.ENOENT
  345. if __name__ == "__main__":
  346. sys.exit(main())