From f16786ba7971b3599c150f6ad867ec4b80e9b392 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20M=C3=BCller?= Date: Tue, 1 Mar 2022 22:53:18 +0100 Subject: [PATCH] mixxx: upgrade 2.3.1 -> 2.3.2 to fix build with ffmpeg5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Andreas Müller --- recipes-musicians/mixxx/mixxx.bb | 11 +- ...1-Update-Benchmark-library-to-v1.6.0.patch | 5933 ----------------- 2 files changed, 4 insertions(+), 5940 deletions(-) delete mode 100644 recipes-musicians/mixxx/mixxx/0001-Update-Benchmark-library-to-v1.6.0.patch diff --git a/recipes-musicians/mixxx/mixxx.bb b/recipes-musicians/mixxx/mixxx.bb index c17ddee..c7cc8d5 100644 --- a/recipes-musicians/mixxx/mixxx.bb +++ b/recipes-musicians/mixxx/mixxx.bb @@ -1,7 +1,7 @@ SUMMARY = "Qt based DJ software" HOMEPAGE = "http://mixxx.org/" LICENSE = "GPLv2+" -LIC_FILES_CHKSUM = "file://LICENSE;md5=e5323335634095f8bdd15f6a5c5c5865" +LIC_FILES_CHKSUM = "file://LICENSE;md5=b3ce5d18079fa79804cd62469a51d176" inherit cmake_qt5 gtk-icon-cache features_check @@ -44,13 +44,10 @@ DEPENDS += " \ # causes segfault trying to find debug libs # gperftools -SRC_URI = " \ - git://github.com/mixxxdj/${BPN}.git;branch=main;protocol=https \ - file://0001-Update-Benchmark-library-to-v1.6.0.patch \ -" -SRCREV = "8acb633220024222504cddcd1f5ea26e659fbcc7" +SRC_URI = "git://github.com/mixxxdj/${BPN}.git;branch=main;protocol=https" +SRCREV = "96fc5dd217a81d0e2327a52f564f7aea7d5c2c43" S = "${WORKDIR}/git" -PV = "2.3.1" +PV = "2.3.2" EXTRA_OECMAKE += " \ -DSHOUTCAST=OFF \ diff --git a/recipes-musicians/mixxx/mixxx/0001-Update-Benchmark-library-to-v1.6.0.patch b/recipes-musicians/mixxx/mixxx/0001-Update-Benchmark-library-to-v1.6.0.patch deleted file mode 100644 index 52a46fc..0000000 --- a/recipes-musicians/mixxx/mixxx/0001-Update-Benchmark-library-to-v1.6.0.patch +++ /dev/null @@ -1,5933 +0,0 @@ -From e590711ca2a0882e331162d14405e25c49f7b774 Mon Sep 17 00:00:00 2001 -From: Uwe Klotz -Date: Tue, 23 Nov 2021 23:36:21 +0100 -Subject: [PATCH] Update Benchmark library to v1.6.0 - - -Upstream-Status: Accepted [https://github.com/mixxxdj/mixxx/commit/e590711ca2a0882e331162d14405e25c49f7b774] ---- - lib/benchmark/AUTHORS | 4 + - lib/benchmark/CMakeLists.txt | 50 +- - lib/benchmark/COMMIT | 2 +- - lib/benchmark/CONTRIBUTING.md | 58 ++ - lib/benchmark/CONTRIBUTORS | 7 + - lib/benchmark/README.md | 216 +++++ - lib/benchmark/cmake/AddCXXCompilerFlag.cmake | 12 +- - lib/benchmark/cmake/CXXFeatureCheck.cmake | 5 + - lib/benchmark/cmake/GetGitVersion.cmake | 22 +- - lib/benchmark/cmake/GoogleTest.cmake | 3 + - lib/benchmark/cmake/GoogleTest.cmake.in | 2 +- - lib/benchmark/cmake/Modules/FindPFM.cmake | 26 + - lib/benchmark/cmake/benchmark.pc.in | 4 +- - lib/benchmark/include/benchmark/benchmark.h | 250 ++++-- - lib/benchmark/src/CMakeLists.txt | 14 +- - lib/benchmark/src/benchmark.cc | 258 ++++-- - lib/benchmark/src/benchmark_api_internal.cc | 93 ++- - lib/benchmark/src/benchmark_api_internal.h | 71 +- - lib/benchmark/src/benchmark_register.cc | 209 +++-- - lib/benchmark/src/benchmark_register.h | 23 +- - lib/benchmark/src/benchmark_runner.cc | 401 +++++----- - lib/benchmark/src/benchmark_runner.h | 73 +- - lib/benchmark/src/check.h | 39 +- - lib/benchmark/src/colorprint.cc | 4 +- - lib/benchmark/src/commandlineflags.cc | 60 +- - lib/benchmark/src/commandlineflags.h | 73 +- - lib/benchmark/src/complexity.cc | 19 +- - lib/benchmark/src/console_reporter.cc | 21 +- - lib/benchmark/src/csv_reporter.cc | 3 +- - lib/benchmark/src/cycleclock.h | 67 +- - lib/benchmark/src/internal_macros.h | 10 +- - lib/benchmark/src/json_reporter.cc | 100 ++- - lib/benchmark/src/log.h | 2 +- - lib/benchmark/src/mutex.h | 44 +- - lib/benchmark/src/perf_counters.cc | 132 +++ - lib/benchmark/src/perf_counters.h | 172 ++++ - lib/benchmark/src/re.h | 2 +- - lib/benchmark/src/reporter.cc | 15 +- - lib/benchmark/src/sleep.cc | 16 + - lib/benchmark/src/statistics.cc | 36 +- - lib/benchmark/src/statistics.h | 1 + - lib/benchmark/src/string_util.cc | 16 + - lib/benchmark/src/string_util.h | 2 + - lib/benchmark/src/sysinfo.cc | 58 +- - lib/benchmark/src/thread_timer.h | 8 +- - lib/benchmark/src/timers.cc | 80 +- - lib/benchmark/tools/compare.py | 37 +- - lib/benchmark/tools/gbench/report.py | 799 +++++++++++++++---- - lib/benchmark/tools/gbench/util.py | 23 +- - 49 files changed, 2754 insertions(+), 888 deletions(-) - create mode 100644 lib/benchmark/CONTRIBUTING.md - create mode 100644 lib/benchmark/README.md - create mode 100644 lib/benchmark/cmake/Modules/FindPFM.cmake - create mode 100644 lib/benchmark/src/perf_counters.cc - create mode 100644 lib/benchmark/src/perf_counters.h - -diff --git a/lib/benchmark/AUTHORS b/lib/benchmark/AUTHORS -index 89205a1adb..838dd4f5bd 100644 ---- a/lib/benchmark/AUTHORS -+++ b/lib/benchmark/AUTHORS -@@ -13,6 +13,7 @@ Alex Steele - Andriy Berestovskyy - Arne Beer - Carto -+Christian Wassermann - Christopher Seymour - Colin Braley - Daniel Harvey -@@ -42,6 +43,7 @@ Matt Clarkson - Maxim Vafin - MongoDB Inc. - Nick Hutchinson -+Norman Heino - Oleksandr Sochka - Ori Livneh - Paul Redmond -@@ -51,6 +53,8 @@ Sayan Bhattacharjee - Shuo Chen - Steinar H. Gunderson - Stripe, Inc. -+Tobias Schmidt - Yixuan Qiu - Yusuke Suzuki - Zbigniew Skowron -+Min-Yih Hsu -diff --git a/lib/benchmark/CMakeLists.txt b/lib/benchmark/CMakeLists.txt -index 67c0b70015..49f2ae2a0f 100644 ---- a/lib/benchmark/CMakeLists.txt -+++ b/lib/benchmark/CMakeLists.txt -@@ -13,7 +13,7 @@ foreach(p - endif() - endforeach() - --project (benchmark CXX) -+project (benchmark VERSION 1.6.0 LANGUAGES CXX) - - option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON) - option(BENCHMARK_ENABLE_EXCEPTIONS "Enable the use of exceptions in the benchmark library." ON) -@@ -34,7 +34,20 @@ option(BENCHMARK_DOWNLOAD_DEPENDENCIES "Allow the downloading and in-tree buildi - # in cases where it is not possible to build or find a valid version of gtest. - option(BENCHMARK_ENABLE_GTEST_TESTS "Enable building the unit tests which depend on gtest" ON) - -+option(BENCHMARK_ENABLE_LIBPFM "Enable performance counters provided by libpfm" OFF) -+ - set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) -+if(MSVC) -+ # As of CMake 3.18, CMAKE_SYSTEM_PROCESSOR is not set properly for MSVC and -+ # cross-compilation (e.g. Host=x86_64, target=aarch64) requires using the -+ # undocumented, but working variable. -+ # See https://gitlab.kitware.com/cmake/cmake/-/issues/15170 -+ set(CMAKE_SYSTEM_PROCESSOR ${MSVC_CXX_ARCHITECTURE_ID}) -+ if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ARM") -+ set(CMAKE_CROSSCOMPILING TRUE) -+ endif() -+endif() -+ - set(ENABLE_ASSEMBLY_TESTS_DEFAULT OFF) - function(should_enable_assembly_tests) - if(CMAKE_BUILD_TYPE) -@@ -81,8 +94,14 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") - include(GetGitVersion) - get_git_version(GIT_VERSION) - -+# If no git version can be determined, use the version -+# from the project() command -+if ("${GIT_VERSION}" STREQUAL "0.0.0") -+ set(VERSION "${benchmark_VERSION}") -+else() -+ set(VERSION "${GIT_VERSION}") -+endif() - # Tell the user what versions we are using --string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" VERSION ${GIT_VERSION}) - message(STATUS "Version: ${VERSION}") - - # The version of the libraries -@@ -144,9 +163,12 @@ else() - add_cxx_compiler_flag(-Werror RELEASE) - add_cxx_compiler_flag(-Werror RELWITHDEBINFO) - add_cxx_compiler_flag(-Werror MINSIZEREL) -- # Disabled until googletest (gmock) stops emitting variadic macro warnings -- #add_cxx_compiler_flag(-pedantic) -- #add_cxx_compiler_flag(-pedantic-errors) -+ if (NOT BENCHMARK_ENABLE_TESTING) -+ # Disable warning when compiling tests as gtest does not use 'override'. -+ add_cxx_compiler_flag(-Wsuggest-override) -+ endif() -+ add_cxx_compiler_flag(-pedantic) -+ add_cxx_compiler_flag(-pedantic-errors) - add_cxx_compiler_flag(-Wshorten-64-to-32) - add_cxx_compiler_flag(-fstrict-aliasing) - # Disable warnings regarding deprecated parts of the library while building -@@ -194,6 +216,7 @@ else() - # Link time optimisation - if (BENCHMARK_ENABLE_LTO) - add_cxx_compiler_flag(-flto) -+ add_cxx_compiler_flag(-Wno-lto-type-mismatch) - if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - find_program(GCC_AR gcc-ar) - if (GCC_AR) -@@ -245,11 +268,17 @@ if (BENCHMARK_USE_LIBCXX) - endif() - endif(BENCHMARK_USE_LIBCXX) - -+set(EXTRA_CXX_FLAGS "") -+if (WIN32 AND "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") -+ # Clang on Windows fails to compile the regex feature check under C++11 -+ set(EXTRA_CXX_FLAGS "-DCMAKE_CXX_STANDARD=14") -+endif() -+ - # C++ feature checks - # Determine the correct regular expression engine to use --cxx_feature_check(STD_REGEX) --cxx_feature_check(GNU_POSIX_REGEX) --cxx_feature_check(POSIX_REGEX) -+cxx_feature_check(STD_REGEX ${EXTRA_CXX_FLAGS}) -+cxx_feature_check(GNU_POSIX_REGEX ${EXTRA_CXX_FLAGS}) -+cxx_feature_check(POSIX_REGEX ${EXTRA_CXX_FLAGS}) - if(NOT HAVE_STD_REGEX AND NOT HAVE_GNU_POSIX_REGEX AND NOT HAVE_POSIX_REGEX) - message(FATAL_ERROR "Failed to determine the source files for the regular expression backend") - endif() -@@ -257,11 +286,16 @@ if (NOT BENCHMARK_ENABLE_EXCEPTIONS AND HAVE_STD_REGEX - AND NOT HAVE_GNU_POSIX_REGEX AND NOT HAVE_POSIX_REGEX) - message(WARNING "Using std::regex with exceptions disabled is not fully supported") - endif() -+ - cxx_feature_check(STEADY_CLOCK) - # Ensure we have pthreads - set(THREADS_PREFER_PTHREAD_FLAG ON) - find_package(Threads REQUIRED) - -+if (BENCHMARK_ENABLE_LIBPFM) -+ find_package(PFM) -+endif() -+ - # Set up directories - include_directories(${PROJECT_SOURCE_DIR}/include) - -diff --git a/lib/benchmark/COMMIT b/lib/benchmark/COMMIT -index 7ca3e2e4ca..2d75860141 100644 ---- a/lib/benchmark/COMMIT -+++ b/lib/benchmark/COMMIT -@@ -1 +1 @@ --e30cac6b06cf05416a9358df8be868ab01602a68 -+f91b6b42b1b9854772a90ae9501464a161707d1e -diff --git a/lib/benchmark/CONTRIBUTING.md b/lib/benchmark/CONTRIBUTING.md -new file mode 100644 -index 0000000000..43de4c9d47 ---- /dev/null -+++ b/lib/benchmark/CONTRIBUTING.md -@@ -0,0 +1,58 @@ -+# How to contribute # -+ -+We'd love to accept your patches and contributions to this project. There are -+a just a few small guidelines you need to follow. -+ -+ -+## Contributor License Agreement ## -+ -+Contributions to any Google project must be accompanied by a Contributor -+License Agreement. This is not a copyright **assignment**, it simply gives -+Google permission to use and redistribute your contributions as part of the -+project. -+ -+ * If you are an individual writing original source code and you're sure you -+ own the intellectual property, then you'll need to sign an [individual -+ CLA][]. -+ -+ * If you work for a company that wants to allow you to contribute your work, -+ then you'll need to sign a [corporate CLA][]. -+ -+You generally only need to submit a CLA once, so if you've already submitted -+one (even if it was for a different project), you probably don't need to do it -+again. -+ -+[individual CLA]: https://developers.google.com/open-source/cla/individual -+[corporate CLA]: https://developers.google.com/open-source/cla/corporate -+ -+Once your CLA is submitted (or if you already submitted one for -+another Google project), make a commit adding yourself to the -+[AUTHORS][] and [CONTRIBUTORS][] files. This commit can be part -+of your first [pull request][]. -+ -+[AUTHORS]: AUTHORS -+[CONTRIBUTORS]: CONTRIBUTORS -+ -+ -+## Submitting a patch ## -+ -+ 1. It's generally best to start by opening a new issue describing the bug or -+ feature you're intending to fix. Even if you think it's relatively minor, -+ it's helpful to know what people are working on. Mention in the initial -+ issue that you are planning to work on that bug or feature so that it can -+ be assigned to you. -+ -+ 1. Follow the normal process of [forking][] the project, and setup a new -+ branch to work in. It's important that each group of changes be done in -+ separate branches in order to ensure that a pull request only includes the -+ commits related to that bug or feature. -+ -+ 1. Do your best to have [well-formed commit messages][] for each change. -+ This provides consistency throughout the project, and ensures that commit -+ messages are able to be formatted properly by various git tools. -+ -+ 1. Finally, push the commits to your fork and submit a [pull request][]. -+ -+[forking]: https://help.github.com/articles/fork-a-repo -+[well-formed commit messages]: http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html -+[pull request]: https://help.github.com/articles/creating-a-pull-request -diff --git a/lib/benchmark/CONTRIBUTORS b/lib/benchmark/CONTRIBUTORS -index 88f7eee06c..7489731de5 100644 ---- a/lib/benchmark/CONTRIBUTORS -+++ b/lib/benchmark/CONTRIBUTORS -@@ -22,12 +22,14 @@ - # - # Please keep the list sorted. - -+Abhina Sreeskantharajan - Albert Pretorius - Alex Steele - Andriy Berestovskyy - Arne Beer - Billy Robert O'Neal III - Chris Kennelly -+Christian Wassermann - Christopher Seymour - Colin Braley - Cyrille Faucheux -@@ -40,6 +42,7 @@ Eric Backus - Eric Fiselier - Eugene Zhuk - Evgeny Safronov -+Fanbo Meng - Federico Ficarelli - Felix Homann - Geoffrey Martin-Noble -@@ -59,6 +62,7 @@ Lei Xu - Matt Clarkson - Maxim Vafin - Nick Hutchinson -+Norman Heino - Oleksandr Sochka - Ori Livneh - Pascal Leroy -@@ -71,8 +75,11 @@ Robert Guo - Roman Lebedev - Sayan Bhattacharjee - Shuo Chen -+Steven Wan -+Tobias Schmidt - Tobias Ulvgård - Tom Madams - Yixuan Qiu - Yusuke Suzuki - Zbigniew Skowron -+Min-Yih Hsu -diff --git a/lib/benchmark/README.md b/lib/benchmark/README.md -new file mode 100644 -index 0000000000..7b81d960fc ---- /dev/null -+++ b/lib/benchmark/README.md -@@ -0,0 +1,216 @@ -+# Benchmark -+ -+[![build-and-test](https://github.com/google/benchmark/workflows/build-and-test/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Abuild-and-test) -+[![bazel](https://github.com/google/benchmark/actions/workflows/bazel.yml/badge.svg)](https://github.com/google/benchmark/actions/workflows/bazel.yml) -+[![pylint](https://github.com/google/benchmark/workflows/pylint/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Apylint) -+[![test-bindings](https://github.com/google/benchmark/workflows/test-bindings/badge.svg)](https://github.com/google/benchmark/actions?query=workflow%3Atest-bindings) -+ -+[![Build Status](https://travis-ci.org/google/benchmark.svg?branch=master)](https://travis-ci.org/google/benchmark) -+[![Coverage Status](https://coveralls.io/repos/google/benchmark/badge.svg)](https://coveralls.io/r/google/benchmark) -+ -+ -+A library to benchmark code snippets, similar to unit tests. Example: -+ -+```c++ -+#include -+ -+static void BM_SomeFunction(benchmark::State& state) { -+ // Perform setup here -+ for (auto _ : state) { -+ // This code gets timed -+ SomeFunction(); -+ } -+} -+// Register the function as a benchmark -+BENCHMARK(BM_SomeFunction); -+// Run the benchmark -+BENCHMARK_MAIN(); -+``` -+ -+## Getting Started -+ -+To get started, see [Requirements](#requirements) and -+[Installation](#installation). See [Usage](#usage) for a full example and the -+[User Guide](docs/user_guide.md) for a more comprehensive feature overview. -+ -+It may also help to read the [Google Test documentation](https://github.com/google/googletest/blob/master/docs/primer.md) -+as some of the structural aspects of the APIs are similar. -+ -+## Resources -+ -+[Discussion group](https://groups.google.com/d/forum/benchmark-discuss) -+ -+IRC channels: -+* [libera](https://libera.chat) #benchmark -+ -+[Additional Tooling Documentation](docs/tools.md) -+ -+[Assembly Testing Documentation](docs/AssemblyTests.md) -+ -+## Requirements -+ -+The library can be used with C++03. However, it requires C++11 to build, -+including compiler and standard library support. -+ -+The following minimum versions are required to build the library: -+ -+* GCC 4.8 -+* Clang 3.4 -+* Visual Studio 14 2015 -+* Intel 2015 Update 1 -+ -+See [Platform-Specific Build Instructions](docs/platform_specific_build_instructions.md). -+ -+## Installation -+ -+This describes the installation process using cmake. As pre-requisites, you'll -+need git and cmake installed. -+ -+_See [dependencies.md](docs/dependencies.md) for more details regarding supported -+versions of build tools._ -+ -+```bash -+# Check out the library. -+$ git clone https://github.com/google/benchmark.git -+# Go to the library root directory -+$ cd benchmark -+# Make a build directory to place the build output. -+$ cmake -E make_directory "build" -+# Generate build system files with cmake, and download any dependencies. -+$ cmake -E chdir "build" cmake -DBENCHMARK_DOWNLOAD_DEPENDENCIES=on -DCMAKE_BUILD_TYPE=Release ../ -+# or, starting with CMake 3.13, use a simpler form: -+# cmake -DCMAKE_BUILD_TYPE=Release -S . -B "build" -+# Build the library. -+$ cmake --build "build" --config Release -+``` -+This builds the `benchmark` and `benchmark_main` libraries and tests. -+On a unix system, the build directory should now look something like this: -+ -+``` -+/benchmark -+ /build -+ /src -+ /libbenchmark.a -+ /libbenchmark_main.a -+ /test -+ ... -+``` -+ -+Next, you can run the tests to check the build. -+ -+```bash -+$ cmake -E chdir "build" ctest --build-config Release -+``` -+ -+If you want to install the library globally, also run: -+ -+``` -+sudo cmake --build "build" --config Release --target install -+``` -+ -+Note that Google Benchmark requires Google Test to build and run the tests. This -+dependency can be provided two ways: -+ -+* Checkout the Google Test sources into `benchmark/googletest`. -+* Otherwise, if `-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON` is specified during -+ configuration as above, the library will automatically download and build -+ any required dependencies. -+ -+If you do not wish to build and run the tests, add `-DBENCHMARK_ENABLE_GTEST_TESTS=OFF` -+to `CMAKE_ARGS`. -+ -+### Debug vs Release -+ -+By default, benchmark builds as a debug library. You will see a warning in the -+output when this is the case. To build it as a release library instead, add -+`-DCMAKE_BUILD_TYPE=Release` when generating the build system files, as shown -+above. The use of `--config Release` in build commands is needed to properly -+support multi-configuration tools (like Visual Studio for example) and can be -+skipped for other build systems (like Makefile). -+ -+To enable link-time optimisation, also add `-DBENCHMARK_ENABLE_LTO=true` when -+generating the build system files. -+ -+If you are using gcc, you might need to set `GCC_AR` and `GCC_RANLIB` cmake -+cache variables, if autodetection fails. -+ -+If you are using clang, you may need to set `LLVMAR_EXECUTABLE`, -+`LLVMNM_EXECUTABLE` and `LLVMRANLIB_EXECUTABLE` cmake cache variables. -+ -+### Stable and Experimental Library Versions -+ -+The main branch contains the latest stable version of the benchmarking library; -+the API of which can be considered largely stable, with source breaking changes -+being made only upon the release of a new major version. -+ -+Newer, experimental, features are implemented and tested on the -+[`v2` branch](https://github.com/google/benchmark/tree/v2). Users who wish -+to use, test, and provide feedback on the new features are encouraged to try -+this branch. However, this branch provides no stability guarantees and reserves -+the right to change and break the API at any time. -+ -+## Usage -+ -+### Basic usage -+ -+Define a function that executes the code to measure, register it as a benchmark -+function using the `BENCHMARK` macro, and ensure an appropriate `main` function -+is available: -+ -+```c++ -+#include -+ -+static void BM_StringCreation(benchmark::State& state) { -+ for (auto _ : state) -+ std::string empty_string; -+} -+// Register the function as a benchmark -+BENCHMARK(BM_StringCreation); -+ -+// Define another benchmark -+static void BM_StringCopy(benchmark::State& state) { -+ std::string x = "hello"; -+ for (auto _ : state) -+ std::string copy(x); -+} -+BENCHMARK(BM_StringCopy); -+ -+BENCHMARK_MAIN(); -+``` -+ -+To run the benchmark, compile and link against the `benchmark` library -+(libbenchmark.a/.so). If you followed the build steps above, this library will -+be under the build directory you created. -+ -+```bash -+# Example on linux after running the build steps above. Assumes the -+# `benchmark` and `build` directories are under the current directory. -+$ g++ mybenchmark.cc -std=c++11 -isystem benchmark/include \ -+ -Lbenchmark/build/src -lbenchmark -lpthread -o mybenchmark -+``` -+ -+Alternatively, link against the `benchmark_main` library and remove -+`BENCHMARK_MAIN();` above to get the same behavior. -+ -+The compiled executable will run all benchmarks by default. Pass the `--help` -+flag for option information or see the [User Guide](docs/user_guide.md). -+ -+### Usage with CMake -+ -+If using CMake, it is recommended to link against the project-provided -+`benchmark::benchmark` and `benchmark::benchmark_main` targets using -+`target_link_libraries`. -+It is possible to use ```find_package``` to import an installed version of the -+library. -+```cmake -+find_package(benchmark REQUIRED) -+``` -+Alternatively, ```add_subdirectory``` will incorporate the library directly in -+to one's CMake project. -+```cmake -+add_subdirectory(benchmark) -+``` -+Either way, link to the library as follows. -+```cmake -+target_link_libraries(MyTarget benchmark::benchmark) -+``` -diff --git a/lib/benchmark/cmake/AddCXXCompilerFlag.cmake b/lib/benchmark/cmake/AddCXXCompilerFlag.cmake -index d0d2099814..858589e977 100644 ---- a/lib/benchmark/cmake/AddCXXCompilerFlag.cmake -+++ b/lib/benchmark/cmake/AddCXXCompilerFlag.cmake -@@ -34,9 +34,11 @@ function(add_cxx_compiler_flag FLAG) - check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG}) - set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}") - if(${MANGLED_FLAG}) -- set(VARIANT ${ARGV1}) -- if(ARGV1) -+ if(ARGC GREATER 1) -+ set(VARIANT ${ARGV1}) - string(TOUPPER "_${VARIANT}" VARIANT) -+ else() -+ set(VARIANT "") - endif() - set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${BENCHMARK_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE) - endif() -@@ -49,9 +51,11 @@ function(add_required_cxx_compiler_flag FLAG) - check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG}) - set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}") - if(${MANGLED_FLAG}) -- set(VARIANT ${ARGV1}) -- if(ARGV1) -+ if(ARGC GREATER 1) -+ set(VARIANT ${ARGV1}) - string(TOUPPER "_${VARIANT}" VARIANT) -+ else() -+ set(VARIANT "") - endif() - set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE) - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FLAG}" PARENT_SCOPE) -diff --git a/lib/benchmark/cmake/CXXFeatureCheck.cmake b/lib/benchmark/cmake/CXXFeatureCheck.cmake -index 059d510dd9..62e6741fe3 100644 ---- a/lib/benchmark/cmake/CXXFeatureCheck.cmake -+++ b/lib/benchmark/cmake/CXXFeatureCheck.cmake -@@ -27,6 +27,11 @@ function(cxx_feature_check FILE) - return() - endif() - -+ if (ARGC GREATER 1) -+ message(STATUS "Enabling additional flags: ${ARGV1}") -+ list(APPEND BENCHMARK_CXX_LINKER_FLAGS ${ARGV1}) -+ endif() -+ - if (NOT DEFINED COMPILE_${FEATURE}) - message(STATUS "Performing Test ${FEATURE}") - if(CMAKE_CROSSCOMPILING) -diff --git a/lib/benchmark/cmake/GetGitVersion.cmake b/lib/benchmark/cmake/GetGitVersion.cmake -index 4f10f226d7..04a1f9b70d 100644 ---- a/lib/benchmark/cmake/GetGitVersion.cmake -+++ b/lib/benchmark/cmake/GetGitVersion.cmake -@@ -20,16 +20,20 @@ set(__get_git_version INCLUDED) - - function(get_git_version var) - if(GIT_EXECUTABLE) -- execute_process(COMMAND ${GIT_EXECUTABLE} describe --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8 -+ execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8 - WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} - RESULT_VARIABLE status -- OUTPUT_VARIABLE GIT_VERSION -+ OUTPUT_VARIABLE GIT_DESCRIBE_VERSION - ERROR_QUIET) -- if(${status}) -- set(GIT_VERSION "v0.0.0") -+ if(status) -+ set(GIT_DESCRIBE_VERSION "v0.0.0") -+ endif() -+ -+ string(STRIP ${GIT_DESCRIBE_VERSION} GIT_DESCRIBE_VERSION) -+ if(GIT_DESCRIBE_VERSION MATCHES v[^-]*-) -+ string(REGEX REPLACE "v([^-]*)-([0-9]+)-.*" "\\1.\\2" GIT_VERSION ${GIT_DESCRIBE_VERSION}) - else() -- string(STRIP ${GIT_VERSION} GIT_VERSION) -- string(REGEX REPLACE "-[0-9]+-g" "-" GIT_VERSION ${GIT_VERSION}) -+ string(REGEX REPLACE "v(.*)" "\\1" GIT_VERSION ${GIT_DESCRIBE_VERSION}) - endif() - - # Work out if the repository is dirty -@@ -43,12 +47,12 @@ function(get_git_version var) - ERROR_QUIET) - string(COMPARE NOTEQUAL "${GIT_DIFF_INDEX}" "" GIT_DIRTY) - if (${GIT_DIRTY}) -- set(GIT_VERSION "${GIT_VERSION}-dirty") -+ set(GIT_DESCRIBE_VERSION "${GIT_DESCRIBE_VERSION}-dirty") - endif() -+ message(STATUS "git version: ${GIT_DESCRIBE_VERSION} normalized to ${GIT_VERSION}") - else() -- set(GIT_VERSION "v0.0.0") -+ set(GIT_VERSION "0.0.0") - endif() - -- message(STATUS "git Version: ${GIT_VERSION}") - set(${var} ${GIT_VERSION} PARENT_SCOPE) - endfunction() -diff --git a/lib/benchmark/cmake/GoogleTest.cmake b/lib/benchmark/cmake/GoogleTest.cmake -index dd611fc875..305eb8df7c 100644 ---- a/lib/benchmark/cmake/GoogleTest.cmake -+++ b/lib/benchmark/cmake/GoogleTest.cmake -@@ -29,6 +29,9 @@ set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) - - include(${GOOGLETEST_PREFIX}/googletest-paths.cmake) - -+# googletest doesn't seem to want to stay build warning clean so let's not hurt ourselves. -+add_compile_options(-w) -+ - # Add googletest directly to our build. This defines - # the gtest and gtest_main targets. - add_subdirectory(${GOOGLETEST_SOURCE_DIR} -diff --git a/lib/benchmark/cmake/GoogleTest.cmake.in b/lib/benchmark/cmake/GoogleTest.cmake.in -index 28818ee293..fd957ff564 100644 ---- a/lib/benchmark/cmake/GoogleTest.cmake.in -+++ b/lib/benchmark/cmake/GoogleTest.cmake.in -@@ -31,7 +31,7 @@ if(EXISTS "${GOOGLETEST_PATH}" AND IS_DIRECTORY "${GOOGLETEST_PATH}" - ) - else() - if(NOT ALLOW_DOWNLOADING_GOOGLETEST) -- message(SEND_ERROR "Did not find Google Test sources! Either pass correct path in GOOGLETEST_PATH, or enable ALLOW_DOWNLOADING_GOOGLETEST, or disable BENCHMARK_ENABLE_GTEST_TESTS / BENCHMARK_ENABLE_TESTING.") -+ message(SEND_ERROR "Did not find Google Test sources! Either pass correct path in GOOGLETEST_PATH, or enable BENCHMARK_DOWNLOAD_DEPENDENCIES, or disable BENCHMARK_ENABLE_GTEST_TESTS / BENCHMARK_ENABLE_TESTING.") - else() - message(WARNING "Did not find Google Test sources! Fetching from web...") - ExternalProject_Add( -diff --git a/lib/benchmark/cmake/Modules/FindPFM.cmake b/lib/benchmark/cmake/Modules/FindPFM.cmake -new file mode 100644 -index 0000000000..cf807a1ee9 ---- /dev/null -+++ b/lib/benchmark/cmake/Modules/FindPFM.cmake -@@ -0,0 +1,26 @@ -+# If successful, the following variables will be defined: -+# HAVE_LIBPFM. -+# Set BENCHMARK_ENABLE_LIBPFM to 0 to disable, regardless of libpfm presence. -+include(CheckIncludeFile) -+include(CheckLibraryExists) -+include(FeatureSummary) -+enable_language(C) -+ -+set_package_properties(PFM PROPERTIES -+ URL http://perfmon2.sourceforge.net/ -+ DESCRIPTION "a helper library to develop monitoring tools" -+ PURPOSE "Used to program specific performance monitoring events") -+ -+check_library_exists(libpfm.a pfm_initialize "" HAVE_LIBPFM_INITIALIZE) -+if(HAVE_LIBPFM_INITIALIZE) -+ check_include_file(perfmon/perf_event.h HAVE_PERFMON_PERF_EVENT_H) -+ check_include_file(perfmon/pfmlib.h HAVE_PERFMON_PFMLIB_H) -+ check_include_file(perfmon/pfmlib_perf_event.h HAVE_PERFMON_PFMLIB_PERF_EVENT_H) -+ if(HAVE_PERFMON_PERF_EVENT_H AND HAVE_PERFMON_PFMLIB_H AND HAVE_PERFMON_PFMLIB_PERF_EVENT_H) -+ message("Using Perf Counters.") -+ set(HAVE_LIBPFM 1) -+ set(PFM_FOUND 1) -+ endif() -+else() -+ message("Perf Counters support requested, but was unable to find libpfm.") -+endif() -diff --git a/lib/benchmark/cmake/benchmark.pc.in b/lib/benchmark/cmake/benchmark.pc.in -index 43ca8f91d7..34beb012ee 100644 ---- a/lib/benchmark/cmake/benchmark.pc.in -+++ b/lib/benchmark/cmake/benchmark.pc.in -@@ -1,7 +1,7 @@ - prefix=@CMAKE_INSTALL_PREFIX@ - exec_prefix=${prefix} --libdir=${prefix}/lib --includedir=${prefix}/include -+libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ -+includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ - - Name: @PROJECT_NAME@ - Description: Google microbenchmark framework -diff --git a/lib/benchmark/include/benchmark/benchmark.h b/lib/benchmark/include/benchmark/benchmark.h -index e5f6778958..fbb5340ce8 100644 ---- a/lib/benchmark/include/benchmark/benchmark.h -+++ b/lib/benchmark/include/benchmark/benchmark.h -@@ -42,6 +42,7 @@ BENCHMARK(BM_StringCopy); - int main(int argc, char** argv) { - benchmark::Initialize(&argc, argv); - benchmark::RunSpecifiedBenchmarks(); -+ benchmark::Shutdown(); - return 0; - } - -@@ -139,13 +140,13 @@ thread exits the loop body. As such, any global setup or teardown you want to - do can be wrapped in a check against the thread index: - - static void BM_MultiThreaded(benchmark::State& state) { -- if (state.thread_index == 0) { -+ if (state.thread_index() == 0) { - // Setup code here. - } - for (auto _ : state) { - // Run the test as normal. - } -- if (state.thread_index == 0) { -+ if (state.thread_index() == 0) { - // Teardown code here. - } - } -@@ -167,6 +168,12 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); - #define BENCHMARK_HAS_CXX11 - #endif - -+// This _MSC_VER check should detect VS 2017 v15.3 and newer. -+#if __cplusplus >= 201703L || \ -+ (defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L) -+#define BENCHMARK_HAS_CXX17 -+#endif -+ - #include - - #include -@@ -176,9 +183,11 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); - #include - #include - #include -+#include - #include - - #if defined(BENCHMARK_HAS_CXX11) -+#include - #include - #include - #include -@@ -198,13 +207,19 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); - TypeName& operator=(const TypeName&) = delete - #endif - --#if defined(__GNUC__) -+#ifdef BENCHMARK_HAS_CXX17 -+#define BENCHMARK_UNUSED [[maybe_unused]] -+#elif defined(__GNUC__) || defined(__clang__) - #define BENCHMARK_UNUSED __attribute__((unused)) -+#else -+#define BENCHMARK_UNUSED -+#endif -+ -+#if defined(__GNUC__) || defined(__clang__) - #define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline)) - #define BENCHMARK_NOEXCEPT noexcept - #define BENCHMARK_NOEXCEPT_OP(x) noexcept(x) - #elif defined(_MSC_VER) && !defined(__clang__) --#define BENCHMARK_UNUSED - #define BENCHMARK_ALWAYS_INLINE __forceinline - #if _MSC_VER >= 1900 - #define BENCHMARK_NOEXCEPT noexcept -@@ -215,7 +230,6 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); - #endif - #define __func__ __FUNCTION__ - #else --#define BENCHMARK_UNUSED - #define BENCHMARK_ALWAYS_INLINE - #define BENCHMARK_NOEXCEPT - #define BENCHMARK_NOEXCEPT_OP(x) -@@ -251,11 +265,18 @@ BENCHMARK(BM_test)->Unit(benchmark::kMillisecond); - #define BENCHMARK_UNREACHABLE() ((void)0) - #endif - -+#ifdef BENCHMARK_HAS_CXX11 -+#define BENCHMARK_OVERRIDE override -+#else -+#define BENCHMARK_OVERRIDE -+#endif -+ - namespace benchmark { - class BenchmarkReporter; - class MemoryManager; - - void Initialize(int* argc, char** argv); -+void Shutdown(); - - // Report to stdout all arguments in 'argv' as unrecognized except the first. - // Returns true there is at least on unrecognized argument (i.e. 'argc' > 1). -@@ -282,6 +303,9 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, - // allocation measurements for benchmark runs. - void RegisterMemoryManager(MemoryManager* memory_manager); - -+// Add a key-value pair to output as part of the context stanza in the report. -+void AddCustomContext(const std::string& key, const std::string& value); -+ - namespace internal { - class Benchmark; - class BenchmarkImp; -@@ -304,6 +328,14 @@ BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams(); - #define BENCHMARK_HAS_NO_INLINE_ASSEMBLY - #endif - -+// Force the compiler to flush pending writes to global memory. Acts as an -+// effective read/write barrier -+#ifdef BENCHMARK_HAS_CXX11 -+inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { -+ std::atomic_signal_fence(std::memory_order_acq_rel); -+} -+#endif -+ - // The DoNotOptimize(...) function can be used to prevent a value or - // expression from being optimized away by the compiler. This function is - // intended to add little to no overhead. -@@ -323,11 +355,11 @@ inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) { - #endif - } - --// Force the compiler to flush pending writes to global memory. Acts as an --// effective read/write barrier -+#ifndef BENCHMARK_HAS_CXX11 - inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { - asm volatile("" : : : "memory"); - } -+#endif - #elif defined(_MSC_VER) - template - inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { -@@ -335,13 +367,15 @@ inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { - _ReadWriteBarrier(); - } - -+#ifndef BENCHMARK_HAS_CXX11 - inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); } -+#endif - #else - template - inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) { - internal::UseCharPointer(&reinterpret_cast(value)); - } --// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers -+// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers, before C++11. - #endif - - // This class is used for user-defined counters. -@@ -406,7 +440,7 @@ typedef std::map UserCounters; - - // TimeUnit is passed to a benchmark in order to specify the order of magnitude - // for the measured time. --enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond }; -+enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond }; - - // BigO is passed to a benchmark in order to specify the asymptotic - // computational -@@ -416,6 +450,8 @@ enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda }; - - typedef uint64_t IterationCount; - -+enum StatisticUnit { kTime, kPercentage }; -+ - // BigOFunc is passed to a benchmark in order to specify the asymptotic - // computational complexity for the benchmark. - typedef double(BigOFunc)(IterationCount); -@@ -428,14 +464,17 @@ namespace internal { - struct Statistics { - std::string name_; - StatisticsFunc* compute_; -+ StatisticUnit unit_; - -- Statistics(const std::string& name, StatisticsFunc* compute) -- : name_(name), compute_(compute) {} -+ Statistics(const std::string& name, StatisticsFunc* compute, -+ StatisticUnit unit = kTime) -+ : name_(name), compute_(compute), unit_(unit) {} - }; - --struct BenchmarkInstance; -+class BenchmarkInstance; - class ThreadTimer; - class ThreadManager; -+class PerfCountersMeasurement; - - enum AggregationReportMode - #if defined(BENCHMARK_HAS_CXX11) -@@ -632,6 +671,14 @@ class State { - BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead") - int64_t range_y() const { return range(1); } - -+ // Number of threads concurrently executing the benchmark. -+ BENCHMARK_ALWAYS_INLINE -+ int threads() const { return threads_; } -+ -+ // Index of the executing thread. Values from [0, threads). -+ BENCHMARK_ALWAYS_INLINE -+ int thread_index() const { return thread_index_; } -+ - BENCHMARK_ALWAYS_INLINE - IterationCount iterations() const { - if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) { -@@ -640,8 +687,8 @@ class State { - return max_iterations - total_iterations_ + batch_leftover_; - } - -- private -- : // items we expect on the first cache line (ie 64 bytes of the struct) -+ private: -+ // items we expect on the first cache line (ie 64 bytes of the struct) - // When total_iterations_ is 0, KeepRunning() and friends will return false. - // May be larger than max_iterations. - IterationCount total_iterations_; -@@ -667,25 +714,27 @@ class State { - public: - // Container for user-defined counters. - UserCounters counters; -- // Index of the executing thread. Values from [0, threads). -- const int thread_index; -- // Number of threads concurrently executing the benchmark. -- const int threads; - - private: - State(IterationCount max_iters, const std::vector& ranges, - int thread_i, int n_threads, internal::ThreadTimer* timer, -- internal::ThreadManager* manager); -+ internal::ThreadManager* manager, -+ internal::PerfCountersMeasurement* perf_counters_measurement); - - void StartKeepRunning(); - // Implementation of KeepRunning() and KeepRunningBatch(). - // is_batch must be true unless n is 1. - bool KeepRunningInternal(IterationCount n, bool is_batch); - void FinishKeepRunning(); -- internal::ThreadTimer* timer_; -- internal::ThreadManager* manager_; - -- friend struct internal::BenchmarkInstance; -+ const int thread_index_; -+ const int threads_; -+ -+ internal::ThreadTimer* const timer_; -+ internal::ThreadManager* const manager_; -+ internal::PerfCountersMeasurement* const perf_counters_measurement_; -+ -+ friend class internal::BenchmarkInstance; - }; - - inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() { -@@ -789,6 +838,9 @@ class Benchmark { - // Note: the following methods all return "this" so that multiple - // method calls can be chained together in one expression. - -+ // Specify the name of the benchmark -+ Benchmark* Name(const std::string& name); -+ - // Run this benchmark once with "x" as the extra argument passed - // to the function. - // REQUIRES: The function passed to the constructor must accept an arg1. -@@ -827,6 +879,11 @@ class Benchmark { - // REQUIRES: The function passed to the constructor must accept arg1, arg2 ... - Benchmark* Ranges(const std::vector >& ranges); - -+ // Run this benchmark once for each combination of values in the (cartesian) -+ // product of the supplied argument lists. -+ // REQUIRES: The function passed to the constructor must accept arg1, arg2 ... -+ Benchmark* ArgsProduct(const std::vector >& arglists); -+ - // Equivalent to ArgNames({name}) - Benchmark* ArgName(const std::string& name); - -@@ -912,7 +969,8 @@ class Benchmark { - Benchmark* Complexity(BigOFunc* complexity); - - // Add this statistics to be computed over all the values of benchmark run -- Benchmark* ComputeStatistics(std::string name, StatisticsFunc* statistics); -+ Benchmark* ComputeStatistics(std::string name, StatisticsFunc* statistics, -+ StatisticUnit unit = kTime); - - // Support for running multiple copies of the same benchmark concurrently - // in multiple threads. This may be useful when measuring the scaling -@@ -955,6 +1013,7 @@ class Benchmark { - - private: - friend class BenchmarkFamilies; -+ friend class BenchmarkInstance; - - std::string name_; - AggregationReportMode aggregation_report_mode_; -@@ -1002,7 +1061,7 @@ class FunctionBenchmark : public Benchmark { - FunctionBenchmark(const char* name, Function* func) - : Benchmark(name), func_(func) {} - -- virtual void Run(State& st); -+ virtual void Run(State& st) BENCHMARK_OVERRIDE; - - private: - Function* func_; -@@ -1012,7 +1071,7 @@ class FunctionBenchmark : public Benchmark { - template - class LambdaBenchmark : public Benchmark { - public: -- virtual void Run(State& st) { lambda_(st); } -+ virtual void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); } - - private: - template -@@ -1064,7 +1123,7 @@ class Fixture : public internal::Benchmark { - public: - Fixture() : internal::Benchmark("") {} - -- virtual void Run(State& st) { -+ virtual void Run(State& st) BENCHMARK_OVERRIDE { - this->SetUp(st); - this->BenchmarkCase(st); - this->TearDown(st); -@@ -1097,9 +1156,12 @@ class Fixture : public internal::Benchmark { - - // Helpers for generating unique variable names - #define BENCHMARK_PRIVATE_NAME(n) \ -- BENCHMARK_PRIVATE_CONCAT(_benchmark_, BENCHMARK_PRIVATE_UNIQUE_ID, n) -+ BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, n) - #define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c) - #define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c -+// Helper for concatenation with macro name expansion -+#define BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method) \ -+ BaseClass##_##Method##_Benchmark - - #define BENCHMARK_PRIVATE_DECLARE(n) \ - static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \ -@@ -1169,37 +1231,37 @@ class Fixture : public internal::Benchmark { - #define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a) - #endif - --#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ -- class BaseClass##_##Method##_Benchmark : public BaseClass { \ -- public: \ -- BaseClass##_##Method##_Benchmark() : BaseClass() { \ -- this->SetName(#BaseClass "/" #Method); \ -- } \ -- \ -- protected: \ -- virtual void BenchmarkCase(::benchmark::State&); \ -+#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ -+ class BaseClass##_##Method##_Benchmark : public BaseClass { \ -+ public: \ -+ BaseClass##_##Method##_Benchmark() : BaseClass() { \ -+ this->SetName(#BaseClass "/" #Method); \ -+ } \ -+ \ -+ protected: \ -+ virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ - }; - --#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ -- class BaseClass##_##Method##_Benchmark : public BaseClass { \ -- public: \ -- BaseClass##_##Method##_Benchmark() : BaseClass() { \ -- this->SetName(#BaseClass "<" #a ">/" #Method); \ -- } \ -- \ -- protected: \ -- virtual void BenchmarkCase(::benchmark::State&); \ -+#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ -+ class BaseClass##_##Method##_Benchmark : public BaseClass { \ -+ public: \ -+ BaseClass##_##Method##_Benchmark() : BaseClass() { \ -+ this->SetName(#BaseClass "<" #a ">/" #Method); \ -+ } \ -+ \ -+ protected: \ -+ virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ - }; - --#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ -- class BaseClass##_##Method##_Benchmark : public BaseClass { \ -- public: \ -- BaseClass##_##Method##_Benchmark() : BaseClass() { \ -- this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \ -- } \ -- \ -- protected: \ -- virtual void BenchmarkCase(::benchmark::State&); \ -+#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ -+ class BaseClass##_##Method##_Benchmark : public BaseClass { \ -+ public: \ -+ BaseClass##_##Method##_Benchmark() : BaseClass() { \ -+ this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \ -+ } \ -+ \ -+ protected: \ -+ virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ - }; - - #ifdef BENCHMARK_HAS_CXX11 -@@ -1211,7 +1273,7 @@ class Fixture : public internal::Benchmark { - } \ - \ - protected: \ -- virtual void BenchmarkCase(::benchmark::State&); \ -+ virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \ - }; - #else - #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \ -@@ -1220,27 +1282,27 @@ class Fixture : public internal::Benchmark { - - #define BENCHMARK_DEFINE_F(BaseClass, Method) \ - BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ -- void BaseClass##_##Method##_Benchmark::BenchmarkCase -+ void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase - - #define BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a) \ - BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ -- void BaseClass##_##Method##_Benchmark::BenchmarkCase -+ void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase - - #define BENCHMARK_TEMPLATE2_DEFINE_F(BaseClass, Method, a, b) \ - BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ -- void BaseClass##_##Method##_Benchmark::BenchmarkCase -+ void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase - - #ifdef BENCHMARK_HAS_CXX11 - #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...) \ - BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \ -- void BaseClass##_##Method##_Benchmark::BenchmarkCase -+ void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase - #else - #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, a) \ - BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a) - #endif - - #define BENCHMARK_REGISTER_F(BaseClass, Method) \ -- BENCHMARK_PRIVATE_REGISTER_F(BaseClass##_##Method##_Benchmark) -+ BENCHMARK_PRIVATE_REGISTER_F(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)) - - #define BENCHMARK_PRIVATE_REGISTER_F(TestName) \ - BENCHMARK_PRIVATE_DECLARE(TestName) = \ -@@ -1250,23 +1312,23 @@ class Fixture : public internal::Benchmark { - #define BENCHMARK_F(BaseClass, Method) \ - BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \ - BENCHMARK_REGISTER_F(BaseClass, Method); \ -- void BaseClass##_##Method##_Benchmark::BenchmarkCase -+ void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase - - #define BENCHMARK_TEMPLATE1_F(BaseClass, Method, a) \ - BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \ - BENCHMARK_REGISTER_F(BaseClass, Method); \ -- void BaseClass##_##Method##_Benchmark::BenchmarkCase -+ void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase - - #define BENCHMARK_TEMPLATE2_F(BaseClass, Method, a, b) \ - BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \ - BENCHMARK_REGISTER_F(BaseClass, Method); \ -- void BaseClass##_##Method##_Benchmark::BenchmarkCase -+ void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase - - #ifdef BENCHMARK_HAS_CXX11 - #define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...) \ - BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \ - BENCHMARK_REGISTER_F(BaseClass, Method); \ -- void BaseClass##_##Method##_Benchmark::BenchmarkCase -+ void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase - #else - #define BENCHMARK_TEMPLATE_F(BaseClass, Method, a) \ - BENCHMARK_TEMPLATE1_F(BaseClass, Method, a) -@@ -1278,6 +1340,8 @@ class Fixture : public internal::Benchmark { - ::benchmark::Initialize(&argc, argv); \ - if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \ - ::benchmark::RunSpecifiedBenchmarks(); \ -+ ::benchmark::Shutdown(); \ -+ return 0; \ - } \ - int main(int, char**) - -@@ -1294,10 +1358,16 @@ struct CPUInfo { - int num_sharing; - }; - -+ enum Scaling { -+ UNKNOWN, -+ ENABLED, -+ DISABLED -+ }; -+ - int num_cpus; -+ Scaling scaling; - double cycles_per_second; - std::vector caches; -- bool scaling_enabled; - std::vector load_avg; - - static const CPUInfo& Get(); -@@ -1356,6 +1426,7 @@ class BenchmarkReporter { - - Run() - : run_type(RT_Iteration), -+ aggregate_unit(kTime), - error_occurred(false), - iterations(1), - threads(1), -@@ -1375,8 +1446,11 @@ class BenchmarkReporter { - - std::string benchmark_name() const; - BenchmarkName run_name; -+ int64_t family_index; -+ int64_t per_family_instance_index; - RunType run_type; - std::string aggregate_name; -+ StatisticUnit aggregate_unit; - std::string report_label; // Empty if not set by benchmark. - bool error_occurred; - std::string error_message; -@@ -1424,6 +1498,19 @@ class BenchmarkReporter { - int64_t max_bytes_used; - }; - -+ struct PerFamilyRunReports { -+ PerFamilyRunReports() : num_runs_total(0), num_runs_done(0) {} -+ -+ // How many runs will all instances of this benchmark perform? -+ int num_runs_total; -+ -+ // How many runs have happened already? -+ int num_runs_done; -+ -+ // The reports about (non-errneous!) runs of this family. -+ std::vector Runs; -+ }; -+ - // Construct a BenchmarkReporter with the output stream set to 'std::cout' - // and the error stream set to 'std::cerr' - BenchmarkReporter(); -@@ -1496,8 +1583,8 @@ class ConsoleReporter : public BenchmarkReporter { - prev_counters_(), - printed_header_(false) {} - -- virtual bool ReportContext(const Context& context); -- virtual void ReportRuns(const std::vector& reports); -+ virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; -+ virtual void ReportRuns(const std::vector& reports) BENCHMARK_OVERRIDE; - - protected: - virtual void PrintRunData(const Run& report); -@@ -1512,9 +1599,9 @@ class ConsoleReporter : public BenchmarkReporter { - class JSONReporter : public BenchmarkReporter { - public: - JSONReporter() : first_report_(true) {} -- virtual bool ReportContext(const Context& context); -- virtual void ReportRuns(const std::vector& reports); -- virtual void Finalize(); -+ virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; -+ virtual void ReportRuns(const std::vector& reports) BENCHMARK_OVERRIDE; -+ virtual void Finalize() BENCHMARK_OVERRIDE; - - private: - void PrintRunData(const Run& report); -@@ -1527,8 +1614,8 @@ class BENCHMARK_DEPRECATED_MSG( - : public BenchmarkReporter { - public: - CSVReporter() : printed_header_(false) {} -- virtual bool ReportContext(const Context& context); -- virtual void ReportRuns(const std::vector& reports); -+ virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE; -+ virtual void ReportRuns(const std::vector& reports) BENCHMARK_OVERRIDE; - - private: - void PrintRunData(const Run& report); -@@ -1562,6 +1649,8 @@ class MemoryManager { - - inline const char* GetTimeUnitString(TimeUnit unit) { - switch (unit) { -+ case kSecond: -+ return "s"; - case kMillisecond: - return "ms"; - case kMicrosecond: -@@ -1574,6 +1663,8 @@ inline const char* GetTimeUnitString(TimeUnit unit) { - - inline double GetTimeUnitMultiplier(TimeUnit unit) { - switch (unit) { -+ case kSecond: -+ return 1; - case kMillisecond: - return 1e3; - case kMicrosecond: -@@ -1584,6 +1675,21 @@ inline double GetTimeUnitMultiplier(TimeUnit unit) { - BENCHMARK_UNREACHABLE(); - } - -+// Creates a list of integer values for the given range and multiplier. -+// This can be used together with ArgsProduct() to allow multiple ranges -+// with different multiplers. -+// Example: -+// ArgsProduct({ -+// CreateRange(0, 1024, /*multi=*/32), -+// CreateRange(0, 100, /*multi=*/4), -+// CreateDenseRange(0, 4, /*step=*/1), -+// }); -+std::vector CreateRange(int64_t lo, int64_t hi, int multi); -+ -+// Creates a list of integer values for the given range and step. -+std::vector CreateDenseRange(int64_t start, int64_t limit, -+ int step); -+ - } // namespace benchmark - - #endif // BENCHMARK_BENCHMARK_H_ -diff --git a/lib/benchmark/src/CMakeLists.txt b/lib/benchmark/src/CMakeLists.txt -index 28acc92dde..1df8a4aa8e 100644 ---- a/lib/benchmark/src/CMakeLists.txt -+++ b/lib/benchmark/src/CMakeLists.txt -@@ -1,7 +1,3 @@ --if(POLICY CMP0069) -- cmake_policy(SET CMP0069 NEW) --endif() -- - # Allow the source files to find headers in src/ - include(GNUInstallDirs) - include_directories(${PROJECT_SOURCE_DIR}/src) -@@ -32,6 +28,12 @@ target_include_directories(benchmark PUBLIC - $ - ) - -+# libpfm, if available -+if (HAVE_LIBPFM) -+ target_link_libraries(benchmark libpfm.a) -+ add_definitions(-DHAVE_LIBPFM) -+endif() -+ - # Link threads. - target_link_libraries(benchmark ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}) - find_library(LIBRT rt) -@@ -115,4 +117,8 @@ if (BENCHMARK_ENABLE_INSTALL) - EXPORT "${targets_export_name}" - NAMESPACE "${namespace}" - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}") -+ -+ install( -+ DIRECTORY "${PROJECT_SOURCE_DIR}/docs/" -+ DESTINATION "${CMAKE_INSTALL_PREFIX}/share/doc/${PROJECT_NAME}") - endif() -diff --git a/lib/benchmark/src/benchmark.cc b/lib/benchmark/src/benchmark.cc -index b751b9c31f..a086453a94 100644 ---- a/lib/benchmark/src/benchmark.cc -+++ b/lib/benchmark/src/benchmark.cc -@@ -13,6 +13,7 @@ - // limitations under the License. - - #include "benchmark/benchmark.h" -+ - #include "benchmark_api_internal.h" - #include "benchmark_runner.h" - #include "internal_macros.h" -@@ -32,7 +33,10 @@ - #include - #include - #include -+#include -+#include - #include -+#include - #include - #include - #include -@@ -45,71 +49,85 @@ - #include "internal_macros.h" - #include "log.h" - #include "mutex.h" -+#include "perf_counters.h" - #include "re.h" - #include "statistics.h" - #include "string_util.h" - #include "thread_manager.h" - #include "thread_timer.h" - -+namespace benchmark { - // Print a list of benchmarks. This option overrides all other options. --DEFINE_bool(benchmark_list_tests, false); -+BM_DEFINE_bool(benchmark_list_tests, false); - - // A regular expression that specifies the set of benchmarks to execute. If - // this flag is empty, or if this flag is the string \"all\", all benchmarks - // linked into the binary are run. --DEFINE_string(benchmark_filter, "."); -+BM_DEFINE_string(benchmark_filter, ""); - - // Minimum number of seconds we should run benchmark before results are - // considered significant. For cpu-time based tests, this is the lower bound - // on the total cpu time used by all threads that make up the test. For - // real-time based tests, this is the lower bound on the elapsed time of the - // benchmark execution, regardless of number of threads. --DEFINE_double(benchmark_min_time, 0.5); -+BM_DEFINE_double(benchmark_min_time, 0.5); - - // The number of runs of each benchmark. If greater than 1, the mean and - // standard deviation of the runs will be reported. --DEFINE_int32(benchmark_repetitions, 1); -+BM_DEFINE_int32(benchmark_repetitions, 1); -+ -+// If set, enable random interleaving of repetitions of all benchmarks. -+// See http://github.com/google/benchmark/issues/1051 for details. -+BM_DEFINE_bool(benchmark_enable_random_interleaving, false); - - // Report the result of each benchmark repetitions. When 'true' is specified - // only the mean, standard deviation, and other statistics are reported for - // repeated benchmarks. Affects all reporters. --DEFINE_bool(benchmark_report_aggregates_only, false); -+BM_DEFINE_bool(benchmark_report_aggregates_only, false); - - // Display the result of each benchmark repetitions. When 'true' is specified - // only the mean, standard deviation, and other statistics are displayed for - // repeated benchmarks. Unlike benchmark_report_aggregates_only, only affects - // the display reporter, but *NOT* file reporter, which will still contain - // all the output. --DEFINE_bool(benchmark_display_aggregates_only, false); -+BM_DEFINE_bool(benchmark_display_aggregates_only, false); - - // The format to use for console output. - // Valid values are 'console', 'json', or 'csv'. --DEFINE_string(benchmark_format, "console"); -+BM_DEFINE_string(benchmark_format, "console"); - - // The format to use for file output. - // Valid values are 'console', 'json', or 'csv'. --DEFINE_string(benchmark_out_format, "json"); -+BM_DEFINE_string(benchmark_out_format, "json"); - - // The file to write additional output to. --DEFINE_string(benchmark_out, ""); -+BM_DEFINE_string(benchmark_out, ""); - - // Whether to use colors in the output. Valid values: - // 'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use colors if - // the output is being sent to a terminal and the TERM environment variable is - // set to a terminal type that supports colors. --DEFINE_string(benchmark_color, "auto"); -+BM_DEFINE_string(benchmark_color, "auto"); - - // Whether to use tabular format when printing user counters to the console. - // Valid values: 'true'/'yes'/1, 'false'/'no'/0. Defaults to false. --DEFINE_bool(benchmark_counters_tabular, false); -+BM_DEFINE_bool(benchmark_counters_tabular, false); - --// The level of verbose logging to output --DEFINE_int32(v, 0); -+// List of additional perf counters to collect, in libpfm format. For more -+// information about libpfm: https://man7.org/linux/man-pages/man3/libpfm.3.html -+BM_DEFINE_string(benchmark_perf_counters, ""); - --namespace benchmark { -+// Extra context to include in the output formatted as comma-separated key-value -+// pairs. Kept internal as it's only used for parsing from env/command line. -+BM_DEFINE_kvpairs(benchmark_context, {}); -+ -+// The level of verbose logging to output -+BM_DEFINE_int32(v, 0); - - namespace internal { - -+std::map* global_context = nullptr; -+ - // FIXME: wouldn't LTO mess this up? - void UseCharPointer(char const volatile*) {} - -@@ -117,7 +135,8 @@ void UseCharPointer(char const volatile*) {} - - State::State(IterationCount max_iters, const std::vector& ranges, - int thread_i, int n_threads, internal::ThreadTimer* timer, -- internal::ThreadManager* manager) -+ internal::ThreadManager* manager, -+ internal::PerfCountersMeasurement* perf_counters_measurement) - : total_iterations_(0), - batch_leftover_(0), - max_iterations(max_iters), -@@ -127,12 +146,14 @@ State::State(IterationCount max_iters, const std::vector& ranges, - range_(ranges), - complexity_n_(0), - counters(), -- thread_index(thread_i), -- threads(n_threads), -+ thread_index_(thread_i), -+ threads_(n_threads), - timer_(timer), -- manager_(manager) { -- CHECK(max_iterations != 0) << "At least one iteration must be run"; -- CHECK_LT(thread_index, threads) << "thread_index must be less than threads"; -+ manager_(manager), -+ perf_counters_measurement_(perf_counters_measurement) { -+ BM_CHECK(max_iterations != 0) << "At least one iteration must be run"; -+ BM_CHECK_LT(thread_index_, threads_) -+ << "thread_index must be less than threads"; - - // Note: The use of offsetof below is technically undefined until C++17 - // because State is not a standard layout type. However, all compilers -@@ -161,17 +182,29 @@ State::State(IterationCount max_iters, const std::vector& ranges, - - void State::PauseTiming() { - // Add in time accumulated so far -- CHECK(started_ && !finished_ && !error_occurred_); -+ BM_CHECK(started_ && !finished_ && !error_occurred_); - timer_->StopTimer(); -+ if (perf_counters_measurement_) { -+ auto measurements = perf_counters_measurement_->StopAndGetMeasurements(); -+ for (const auto& name_and_measurement : measurements) { -+ auto name = name_and_measurement.first; -+ auto measurement = name_and_measurement.second; -+ BM_CHECK_EQ(counters[name], 0.0); -+ counters[name] = Counter(measurement, Counter::kAvgIterations); -+ } -+ } - } - - void State::ResumeTiming() { -- CHECK(started_ && !finished_ && !error_occurred_); -+ BM_CHECK(started_ && !finished_ && !error_occurred_); - timer_->StartTimer(); -+ if (perf_counters_measurement_) { -+ perf_counters_measurement_->Start(); -+ } - } - - void State::SkipWithError(const char* msg) { -- CHECK(msg); -+ BM_CHECK(msg); - error_occurred_ = true; - { - MutexLock l(manager_->GetBenchmarkMutex()); -@@ -194,7 +227,7 @@ void State::SetLabel(const char* label) { - } - - void State::StartKeepRunning() { -- CHECK(!started_ && !finished_); -+ BM_CHECK(!started_ && !finished_); - started_ = true; - total_iterations_ = error_occurred_ ? 0 : max_iterations; - manager_->StartStopBarrier(); -@@ -202,7 +235,7 @@ void State::StartKeepRunning() { - } - - void State::FinishKeepRunning() { -- CHECK(started_ && (!finished_ || error_occurred_)); -+ BM_CHECK(started_ && (!finished_ || error_occurred_)); - if (!error_occurred_) { - PauseTiming(); - } -@@ -215,11 +248,42 @@ void State::FinishKeepRunning() { - namespace internal { - namespace { - -+// Flushes streams after invoking reporter methods that write to them. This -+// ensures users get timely updates even when streams are not line-buffered. -+void FlushStreams(BenchmarkReporter* reporter) { -+ if (!reporter) return; -+ std::flush(reporter->GetOutputStream()); -+ std::flush(reporter->GetErrorStream()); -+} -+ -+// Reports in both display and file reporters. -+void Report(BenchmarkReporter* display_reporter, -+ BenchmarkReporter* file_reporter, const RunResults& run_results) { -+ auto report_one = [](BenchmarkReporter* reporter, bool aggregates_only, -+ const RunResults& results) { -+ assert(reporter); -+ // If there are no aggregates, do output non-aggregates. -+ aggregates_only &= !results.aggregates_only.empty(); -+ if (!aggregates_only) reporter->ReportRuns(results.non_aggregates); -+ if (!results.aggregates_only.empty()) -+ reporter->ReportRuns(results.aggregates_only); -+ }; -+ -+ report_one(display_reporter, run_results.display_report_aggregates_only, -+ run_results); -+ if (file_reporter) -+ report_one(file_reporter, run_results.file_report_aggregates_only, -+ run_results); -+ -+ FlushStreams(display_reporter); -+ FlushStreams(file_reporter); -+} -+ - void RunBenchmarks(const std::vector& benchmarks, - BenchmarkReporter* display_reporter, - BenchmarkReporter* file_reporter) { - // Note the file_reporter can be null. -- CHECK(display_reporter != nullptr); -+ BM_CHECK(display_reporter != nullptr); - - // Determine the width of the name field using a minimum width of 10. - bool might_have_aggregates = FLAGS_benchmark_repetitions > 1; -@@ -227,10 +291,10 @@ void RunBenchmarks(const std::vector& benchmarks, - size_t stat_field_width = 0; - for (const BenchmarkInstance& benchmark : benchmarks) { - name_field_width = -- std::max(name_field_width, benchmark.name.str().size()); -- might_have_aggregates |= benchmark.repetitions > 1; -+ std::max(name_field_width, benchmark.name().str().size()); -+ might_have_aggregates |= benchmark.repetitions() > 1; - -- for (const auto& Stat : *benchmark.statistics) -+ for (const auto& Stat : benchmark.statistics()) - stat_field_width = std::max(stat_field_width, Stat.name_.size()); - } - if (might_have_aggregates) name_field_width += 1 + stat_field_width; -@@ -239,55 +303,84 @@ void RunBenchmarks(const std::vector& benchmarks, - BenchmarkReporter::Context context; - context.name_field_width = name_field_width; - -- // Keep track of running times of all instances of current benchmark -- std::vector complexity_reports; -- -- // We flush streams after invoking reporter methods that write to them. This -- // ensures users get timely updates even when streams are not line-buffered. -- auto flushStreams = [](BenchmarkReporter* reporter) { -- if (!reporter) return; -- std::flush(reporter->GetOutputStream()); -- std::flush(reporter->GetErrorStream()); -- }; -+ // Keep track of running times of all instances of each benchmark family. -+ std::map -+ per_family_reports; - - if (display_reporter->ReportContext(context) && - (!file_reporter || file_reporter->ReportContext(context))) { -- flushStreams(display_reporter); -- flushStreams(file_reporter); -- -- for (const auto& benchmark : benchmarks) { -- RunResults run_results = RunBenchmark(benchmark, &complexity_reports); -- -- auto report = [&run_results](BenchmarkReporter* reporter, -- bool report_aggregates_only) { -- assert(reporter); -- // If there are no aggregates, do output non-aggregates. -- report_aggregates_only &= !run_results.aggregates_only.empty(); -- if (!report_aggregates_only) -- reporter->ReportRuns(run_results.non_aggregates); -- if (!run_results.aggregates_only.empty()) -- reporter->ReportRuns(run_results.aggregates_only); -- }; -- -- report(display_reporter, run_results.display_report_aggregates_only); -- if (file_reporter) -- report(file_reporter, run_results.file_report_aggregates_only); -- -- flushStreams(display_reporter); -- flushStreams(file_reporter); -+ FlushStreams(display_reporter); -+ FlushStreams(file_reporter); -+ -+ size_t num_repetitions_total = 0; -+ -+ std::vector runners; -+ runners.reserve(benchmarks.size()); -+ for (const BenchmarkInstance& benchmark : benchmarks) { -+ BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr; -+ if (benchmark.complexity() != oNone) -+ reports_for_family = &per_family_reports[benchmark.family_index()]; -+ -+ runners.emplace_back(benchmark, reports_for_family); -+ int num_repeats_of_this_instance = runners.back().GetNumRepeats(); -+ num_repetitions_total += num_repeats_of_this_instance; -+ if (reports_for_family) -+ reports_for_family->num_runs_total += num_repeats_of_this_instance; -+ } -+ assert(runners.size() == benchmarks.size() && "Unexpected runner count."); -+ -+ std::vector repetition_indices; -+ repetition_indices.reserve(num_repetitions_total); -+ for (size_t runner_index = 0, num_runners = runners.size(); -+ runner_index != num_runners; ++runner_index) { -+ const internal::BenchmarkRunner& runner = runners[runner_index]; -+ std::fill_n(std::back_inserter(repetition_indices), -+ runner.GetNumRepeats(), runner_index); -+ } -+ assert(repetition_indices.size() == num_repetitions_total && -+ "Unexpected number of repetition indexes."); -+ -+ if (FLAGS_benchmark_enable_random_interleaving) { -+ std::random_device rd; -+ std::mt19937 g(rd()); -+ std::shuffle(repetition_indices.begin(), repetition_indices.end(), g); -+ } -+ -+ for (size_t repetition_index : repetition_indices) { -+ internal::BenchmarkRunner& runner = runners[repetition_index]; -+ runner.DoOneRepetition(); -+ if (runner.HasRepeatsRemaining()) continue; -+ // FIXME: report each repetition separately, not all of them in bulk. -+ -+ RunResults run_results = runner.GetResults(); -+ -+ // Maybe calculate complexity report -+ if (const auto* reports_for_family = runner.GetReportsForFamily()) { -+ if (reports_for_family->num_runs_done == -+ reports_for_family->num_runs_total) { -+ auto additional_run_stats = ComputeBigO(reports_for_family->Runs); -+ run_results.aggregates_only.insert(run_results.aggregates_only.end(), -+ additional_run_stats.begin(), -+ additional_run_stats.end()); -+ per_family_reports.erase( -+ (int)reports_for_family->Runs.front().family_index); -+ } -+ } -+ -+ Report(display_reporter, file_reporter, run_results); - } - } - display_reporter->Finalize(); - if (file_reporter) file_reporter->Finalize(); -- flushStreams(display_reporter); -- flushStreams(file_reporter); -+ FlushStreams(display_reporter); -+ FlushStreams(file_reporter); - } - - // Disable deprecated warnings temporarily because we need to reference --// CSVReporter but don't want to trigger -Werror=-Wdeprecated -+// CSVReporter but don't want to trigger -Werror=-Wdeprecated-declarations - #ifdef __GNUC__ - #pragma GCC diagnostic push --#pragma GCC diagnostic ignored "-Wdeprecated" -+#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - #endif - - std::unique_ptr CreateReporter( -@@ -377,7 +470,7 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, - if (!fname.empty()) { - output_file.open(fname); - if (!output_file.is_open()) { -- Err << "invalid file name: '" << fname << std::endl; -+ Err << "invalid file name: '" << fname << "'" << std::endl; - std::exit(1); - } - if (!file_reporter) { -@@ -399,7 +492,7 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, - - if (FLAGS_benchmark_list_tests) { - for (auto const& benchmark : benchmarks) -- Out << benchmark.name.str() << "\n"; -+ Out << benchmark.name().str() << "\n"; - } else { - internal::RunBenchmarks(benchmarks, display_reporter, file_reporter); - } -@@ -411,6 +504,16 @@ void RegisterMemoryManager(MemoryManager* manager) { - internal::memory_manager = manager; - } - -+void AddCustomContext(const std::string& key, const std::string& value) { -+ if (internal::global_context == nullptr) { -+ internal::global_context = new std::map(); -+ } -+ if (!internal::global_context->emplace(key, value).second) { -+ std::cerr << "Failed to add custom context \"" << key << "\" as it already " -+ << "exists with value \"" << value << "\"\n"; -+ } -+} -+ - namespace internal { - - void PrintUsageAndExit() { -@@ -420,6 +523,7 @@ void PrintUsageAndExit() { - " [--benchmark_filter=]\n" - " [--benchmark_min_time=]\n" - " [--benchmark_repetitions=]\n" -+ " [--benchmark_enable_random_interleaving={true|false}]\n" - " [--benchmark_report_aggregates_only={true|false}]\n" - " [--benchmark_display_aggregates_only={true|false}]\n" - " [--benchmark_format=]\n" -@@ -427,6 +531,8 @@ void PrintUsageAndExit() { - " [--benchmark_out_format=]\n" - " [--benchmark_color={auto|true|false}]\n" - " [--benchmark_counters_tabular={true|false}]\n" -+ " [--benchmark_perf_counters=,...]\n" -+ " [--benchmark_context==,...]\n" - " [--v=]\n"); - exit(0); - } -@@ -443,6 +549,8 @@ void ParseCommandLineFlags(int* argc, char** argv) { - &FLAGS_benchmark_min_time) || - ParseInt32Flag(argv[i], "benchmark_repetitions", - &FLAGS_benchmark_repetitions) || -+ ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving", -+ &FLAGS_benchmark_enable_random_interleaving) || - ParseBoolFlag(argv[i], "benchmark_report_aggregates_only", - &FLAGS_benchmark_report_aggregates_only) || - ParseBoolFlag(argv[i], "benchmark_display_aggregates_only", -@@ -457,6 +565,10 @@ void ParseCommandLineFlags(int* argc, char** argv) { - ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) || - ParseBoolFlag(argv[i], "benchmark_counters_tabular", - &FLAGS_benchmark_counters_tabular) || -+ ParseStringFlag(argv[i], "benchmark_perf_counters", -+ &FLAGS_benchmark_perf_counters) || -+ ParseKeyValueFlag(argv[i], "benchmark_context", -+ &FLAGS_benchmark_context) || - ParseInt32Flag(argv[i], "v", &FLAGS_v)) { - for (int j = i; j != *argc - 1; ++j) argv[j] = argv[j + 1]; - -@@ -467,13 +579,17 @@ void ParseCommandLineFlags(int* argc, char** argv) { - } - } - for (auto const* flag : -- {&FLAGS_benchmark_format, &FLAGS_benchmark_out_format}) -+ {&FLAGS_benchmark_format, &FLAGS_benchmark_out_format}) { - if (*flag != "console" && *flag != "json" && *flag != "csv") { - PrintUsageAndExit(); - } -+ } - if (FLAGS_benchmark_color.empty()) { - PrintUsageAndExit(); - } -+ for (const auto& kv : FLAGS_benchmark_context) { -+ AddCustomContext(kv.first, kv.second); -+ } - } - - int InitializeStreams() { -@@ -488,6 +604,10 @@ void Initialize(int* argc, char** argv) { - internal::LogLevel() = FLAGS_v; - } - -+void Shutdown() { -+ delete internal::global_context; -+} -+ - bool ReportUnrecognizedArguments(int argc, char** argv) { - for (int i = 1; i < argc; ++i) { - fprintf(stderr, "%s: error: unrecognized command-line flag: %s\n", argv[0], -diff --git a/lib/benchmark/src/benchmark_api_internal.cc b/lib/benchmark/src/benchmark_api_internal.cc -index 83c4eeb5c3..89da519afc 100644 ---- a/lib/benchmark/src/benchmark_api_internal.cc -+++ b/lib/benchmark/src/benchmark_api_internal.cc -@@ -1,15 +1,94 @@ - #include "benchmark_api_internal.h" - -+#include -+ -+#include "string_util.h" -+ - namespace benchmark { - namespace internal { - --State BenchmarkInstance::Run(IterationCount iters, int thread_id, -- internal::ThreadTimer* timer, -- internal::ThreadManager* manager) const { -- State st(iters, arg, thread_id, threads, timer, manager); -- benchmark->Run(st); -+BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx, -+ int per_family_instance_idx, -+ const std::vector& args, -+ int thread_count) -+ : benchmark_(*benchmark), -+ family_index_(family_idx), -+ per_family_instance_index_(per_family_instance_idx), -+ aggregation_report_mode_(benchmark_.aggregation_report_mode_), -+ args_(args), -+ time_unit_(benchmark_.time_unit_), -+ measure_process_cpu_time_(benchmark_.measure_process_cpu_time_), -+ use_real_time_(benchmark_.use_real_time_), -+ use_manual_time_(benchmark_.use_manual_time_), -+ complexity_(benchmark_.complexity_), -+ complexity_lambda_(benchmark_.complexity_lambda_), -+ statistics_(benchmark_.statistics_), -+ repetitions_(benchmark_.repetitions_), -+ min_time_(benchmark_.min_time_), -+ iterations_(benchmark_.iterations_), -+ threads_(thread_count) { -+ name_.function_name = benchmark_.name_; -+ -+ size_t arg_i = 0; -+ for (const auto& arg : args) { -+ if (!name_.args.empty()) { -+ name_.args += '/'; -+ } -+ -+ if (arg_i < benchmark->arg_names_.size()) { -+ const auto& arg_name = benchmark_.arg_names_[arg_i]; -+ if (!arg_name.empty()) { -+ name_.args += StrFormat("%s:", arg_name.c_str()); -+ } -+ } -+ -+ name_.args += StrFormat("%" PRId64, arg); -+ ++arg_i; -+ } -+ -+ if (!IsZero(benchmark->min_time_)) { -+ name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_); -+ } -+ -+ if (benchmark_.iterations_ != 0) { -+ name_.iterations = StrFormat( -+ "iterations:%lu", static_cast(benchmark_.iterations_)); -+ } -+ -+ if (benchmark_.repetitions_ != 0) { -+ name_.repetitions = StrFormat("repeats:%d", benchmark_.repetitions_); -+ } -+ -+ if (benchmark_.measure_process_cpu_time_) { -+ name_.time_type = "process_time"; -+ } -+ -+ if (benchmark_.use_manual_time_) { -+ if (!name_.time_type.empty()) { -+ name_.time_type += '/'; -+ } -+ name_.time_type += "manual_time"; -+ } else if (benchmark_.use_real_time_) { -+ if (!name_.time_type.empty()) { -+ name_.time_type += '/'; -+ } -+ name_.time_type += "real_time"; -+ } -+ -+ if (!benchmark_.thread_counts_.empty()) { -+ name_.threads = StrFormat("threads:%d", threads_); -+ } -+} -+ -+State BenchmarkInstance::Run( -+ IterationCount iters, int thread_id, internal::ThreadTimer* timer, -+ internal::ThreadManager* manager, -+ internal::PerfCountersMeasurement* perf_counters_measurement) const { -+ State st(iters, args_, thread_id, threads_, timer, manager, -+ perf_counters_measurement); -+ benchmark_.Run(st); - return st; - } - --} // namespace internal --} // namespace benchmark -+} // namespace internal -+} // namespace benchmark -diff --git a/lib/benchmark/src/benchmark_api_internal.h b/lib/benchmark/src/benchmark_api_internal.h -index 264eff95c5..592dd46303 100644 ---- a/lib/benchmark/src/benchmark_api_internal.h -+++ b/lib/benchmark/src/benchmark_api_internal.h -@@ -1,9 +1,6 @@ - #ifndef BENCHMARK_API_INTERNAL_H - #define BENCHMARK_API_INTERNAL_H - --#include "benchmark/benchmark.h" --#include "commandlineflags.h" -- - #include - #include - #include -@@ -11,32 +8,60 @@ - #include - #include - -+#include "benchmark/benchmark.h" -+#include "commandlineflags.h" -+ - namespace benchmark { - namespace internal { - - // Information kept per benchmark we may want to run --struct BenchmarkInstance { -- BenchmarkName name; -- Benchmark* benchmark; -- AggregationReportMode aggregation_report_mode; -- std::vector arg; -- TimeUnit time_unit; -- int range_multiplier; -- bool measure_process_cpu_time; -- bool use_real_time; -- bool use_manual_time; -- BigO complexity; -- BigOFunc* complexity_lambda; -- UserCounters counters; -- const std::vector* statistics; -- bool last_benchmark_instance; -- int repetitions; -- double min_time; -- IterationCount iterations; -- int threads; // Number of concurrent threads to us -+class BenchmarkInstance { -+ public: -+ BenchmarkInstance(Benchmark* benchmark, int family_index, -+ int per_family_instance_index, -+ const std::vector& args, int threads); -+ -+ const BenchmarkName& name() const { return name_; } -+ int family_index() const { return family_index_; } -+ int per_family_instance_index() const { return per_family_instance_index_; } -+ AggregationReportMode aggregation_report_mode() const { -+ return aggregation_report_mode_; -+ } -+ TimeUnit time_unit() const { return time_unit_; } -+ bool measure_process_cpu_time() const { return measure_process_cpu_time_; } -+ bool use_real_time() const { return use_real_time_; } -+ bool use_manual_time() const { return use_manual_time_; } -+ BigO complexity() const { return complexity_; } -+ BigOFunc* complexity_lambda() const { return complexity_lambda_; } -+ const std::vector& statistics() const { return statistics_; } -+ int repetitions() const { return repetitions_; } -+ double min_time() const { return min_time_; } -+ IterationCount iterations() const { return iterations_; } -+ int threads() const { return threads_; } - - State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer, -- internal::ThreadManager* manager) const; -+ internal::ThreadManager* manager, -+ internal::PerfCountersMeasurement* perf_counters_measurement) const; -+ -+ private: -+ BenchmarkName name_; -+ Benchmark& benchmark_; -+ const int family_index_; -+ const int per_family_instance_index_; -+ AggregationReportMode aggregation_report_mode_; -+ const std::vector& args_; -+ TimeUnit time_unit_; -+ bool measure_process_cpu_time_; -+ bool use_real_time_; -+ bool use_manual_time_; -+ BigO complexity_; -+ BigOFunc* complexity_lambda_; -+ UserCounters counters_; -+ const std::vector& statistics_; -+ int repetitions_; -+ double min_time_; -+ IterationCount iterations_; -+ int threads_; // Number of concurrent threads to us - }; - - bool FindBenchmarksInternal(const std::string& re, -diff --git a/lib/benchmark/src/benchmark_register.cc b/lib/benchmark/src/benchmark_register.cc -index cca39b2215..f2b32bdbf8 100644 ---- a/lib/benchmark/src/benchmark_register.cc -+++ b/lib/benchmark/src/benchmark_register.cc -@@ -24,6 +24,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -31,14 +32,10 @@ - #include - #include - #include -+#include - #include - #include - --#ifndef __STDC_FORMAT_MACROS --#define __STDC_FORMAT_MACROS --#endif --#include -- - #include "benchmark/benchmark.h" - #include "benchmark_api_internal.h" - #include "check.h" -@@ -114,7 +111,7 @@ void BenchmarkFamilies::ClearBenchmarks() { - bool BenchmarkFamilies::FindBenchmarks( - std::string spec, std::vector* benchmarks, - std::ostream* ErrStream) { -- CHECK(ErrStream); -+ BM_CHECK(ErrStream); - auto& Err = *ErrStream; - // Make regular expression out of command-line flag - std::string error_msg; -@@ -132,8 +129,13 @@ bool BenchmarkFamilies::FindBenchmarks( - // Special list of thread counts to use when none are specified - const std::vector one_thread = {1}; - -+ int next_family_index = 0; -+ - MutexLock l(mutex_); - for (std::unique_ptr& family : families_) { -+ int family_index = next_family_index; -+ int per_family_instance_index = 0; -+ - // Family was deleted or benchmark doesn't match - if (!family) continue; - -@@ -153,84 +155,24 @@ bool BenchmarkFamilies::FindBenchmarks( - } - // reserve in the special case the regex ".", since we know the final - // family size. -- if (spec == ".") benchmarks->reserve(family_size); -+ if (spec == ".") benchmarks->reserve(benchmarks->size() + family_size); - - for (auto const& args : family->args_) { - for (int num_threads : *thread_counts) { -- BenchmarkInstance instance; -- instance.name.function_name = family->name_; -- instance.benchmark = family.get(); -- instance.aggregation_report_mode = family->aggregation_report_mode_; -- instance.arg = args; -- instance.time_unit = family->time_unit_; -- instance.range_multiplier = family->range_multiplier_; -- instance.min_time = family->min_time_; -- instance.iterations = family->iterations_; -- instance.repetitions = family->repetitions_; -- instance.measure_process_cpu_time = family->measure_process_cpu_time_; -- instance.use_real_time = family->use_real_time_; -- instance.use_manual_time = family->use_manual_time_; -- instance.complexity = family->complexity_; -- instance.complexity_lambda = family->complexity_lambda_; -- instance.statistics = &family->statistics_; -- instance.threads = num_threads; -- -- // Add arguments to instance name -- size_t arg_i = 0; -- for (auto const& arg : args) { -- if (!instance.name.args.empty()) { -- instance.name.args += '/'; -- } -- -- if (arg_i < family->arg_names_.size()) { -- const auto& arg_name = family->arg_names_[arg_i]; -- if (!arg_name.empty()) { -- instance.name.args += StrFormat("%s:", arg_name.c_str()); -- } -- } -- -- instance.name.args += StrFormat("%" PRId64, arg); -- ++arg_i; -- } -- -- if (!IsZero(family->min_time_)) -- instance.name.min_time = -- StrFormat("min_time:%0.3f", family->min_time_); -- if (family->iterations_ != 0) { -- instance.name.iterations = -- StrFormat("iterations:%lu", -- static_cast(family->iterations_)); -- } -- if (family->repetitions_ != 0) -- instance.name.repetitions = -- StrFormat("repeats:%d", family->repetitions_); -- -- if (family->measure_process_cpu_time_) { -- instance.name.time_type = "process_time"; -- } -+ BenchmarkInstance instance(family.get(), family_index, -+ per_family_instance_index, args, -+ num_threads); - -- if (family->use_manual_time_) { -- if (!instance.name.time_type.empty()) { -- instance.name.time_type += '/'; -- } -- instance.name.time_type += "manual_time"; -- } else if (family->use_real_time_) { -- if (!instance.name.time_type.empty()) { -- instance.name.time_type += '/'; -- } -- instance.name.time_type += "real_time"; -- } -- -- // Add the number of threads used to the name -- if (!family->thread_counts_.empty()) { -- instance.name.threads = StrFormat("threads:%d", instance.threads); -- } -- -- const auto full_name = instance.name.str(); -+ const auto full_name = instance.name().str(); - if ((re.Match(full_name) && !isNegativeFilter) || - (!re.Match(full_name) && isNegativeFilter)) { -- instance.last_benchmark_instance = (&args == &family->args_.back()); - benchmarks->push_back(std::move(instance)); -+ -+ ++per_family_instance_index; -+ -+ // Only bump the next family index once we've estabilished that -+ // at least one instance of this family will be run. -+ if (next_family_index == family_index) ++next_family_index; - } - } - } -@@ -273,12 +215,18 @@ Benchmark::Benchmark(const char* name) - ComputeStatistics("mean", StatisticsMean); - ComputeStatistics("median", StatisticsMedian); - ComputeStatistics("stddev", StatisticsStdDev); -+ ComputeStatistics("cv", StatisticsCV, kPercentage); - } - - Benchmark::~Benchmark() {} - -+Benchmark* Benchmark::Name(const std::string& name) { -+ SetName(name.c_str()); -+ return this; -+} -+ - Benchmark* Benchmark::Arg(int64_t x) { -- CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); -+ BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); - args_.push_back({x}); - return this; - } -@@ -289,7 +237,7 @@ Benchmark* Benchmark::Unit(TimeUnit unit) { - } - - Benchmark* Benchmark::Range(int64_t start, int64_t limit) { -- CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); -+ BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); - std::vector arglist; - AddRange(&arglist, start, limit, range_multiplier_); - -@@ -301,53 +249,61 @@ Benchmark* Benchmark::Range(int64_t start, int64_t limit) { - - Benchmark* Benchmark::Ranges( - const std::vector>& ranges) { -- CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(ranges.size())); -+ BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(ranges.size())); - std::vector> arglists(ranges.size()); -- std::size_t total = 1; - for (std::size_t i = 0; i < ranges.size(); i++) { - AddRange(&arglists[i], ranges[i].first, ranges[i].second, - range_multiplier_); -- total *= arglists[i].size(); - } - -- std::vector ctr(arglists.size(), 0); -- -- for (std::size_t i = 0; i < total; i++) { -- std::vector tmp; -- tmp.reserve(arglists.size()); -+ ArgsProduct(arglists); - -- for (std::size_t j = 0; j < arglists.size(); j++) { -- tmp.push_back(arglists[j].at(ctr[j])); -- } -+ return this; -+} - -- args_.push_back(std::move(tmp)); -+Benchmark* Benchmark::ArgsProduct( -+ const std::vector>& arglists) { -+ BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(arglists.size())); - -- for (std::size_t j = 0; j < arglists.size(); j++) { -- if (ctr[j] + 1 < arglists[j].size()) { -- ++ctr[j]; -- break; -- } -- ctr[j] = 0; -+ std::vector indices(arglists.size()); -+ const std::size_t total = std::accumulate( -+ std::begin(arglists), std::end(arglists), std::size_t{1}, -+ [](const std::size_t res, const std::vector& arglist) { -+ return res * arglist.size(); -+ }); -+ std::vector args; -+ args.reserve(arglists.size()); -+ for (std::size_t i = 0; i < total; i++) { -+ for (std::size_t arg = 0; arg < arglists.size(); arg++) { -+ args.push_back(arglists[arg][indices[arg]]); - } -+ args_.push_back(args); -+ args.clear(); -+ -+ std::size_t arg = 0; -+ do { -+ indices[arg] = (indices[arg] + 1) % arglists[arg].size(); -+ } while (indices[arg++] == 0 && arg < arglists.size()); - } -+ - return this; - } - - Benchmark* Benchmark::ArgName(const std::string& name) { -- CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); -+ BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); - arg_names_ = {name}; - return this; - } - - Benchmark* Benchmark::ArgNames(const std::vector& names) { -- CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(names.size())); -+ BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(names.size())); - arg_names_ = names; - return this; - } - - Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) { -- CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); -- CHECK_LE(start, limit); -+ BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); -+ BM_CHECK_LE(start, limit); - for (int64_t arg = start; arg <= limit; arg += step) { - args_.push_back({arg}); - } -@@ -355,7 +311,7 @@ Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) { - } - - Benchmark* Benchmark::Args(const std::vector& args) { -- CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(args.size())); -+ BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast(args.size())); - args_.push_back(args); - return this; - } -@@ -366,27 +322,27 @@ Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) { - } - - Benchmark* Benchmark::RangeMultiplier(int multiplier) { -- CHECK(multiplier > 1); -+ BM_CHECK(multiplier > 1); - range_multiplier_ = multiplier; - return this; - } - - Benchmark* Benchmark::MinTime(double t) { -- CHECK(t > 0.0); -- CHECK(iterations_ == 0); -+ BM_CHECK(t > 0.0); -+ BM_CHECK(iterations_ == 0); - min_time_ = t; - return this; - } - - Benchmark* Benchmark::Iterations(IterationCount n) { -- CHECK(n > 0); -- CHECK(IsZero(min_time_)); -+ BM_CHECK(n > 0); -+ BM_CHECK(IsZero(min_time_)); - iterations_ = n; - return this; - } - - Benchmark* Benchmark::Repetitions(int n) { -- CHECK(n > 0); -+ BM_CHECK(n > 0); - repetitions_ = n; - return this; - } -@@ -419,14 +375,14 @@ Benchmark* Benchmark::MeasureProcessCPUTime() { - } - - Benchmark* Benchmark::UseRealTime() { -- CHECK(!use_manual_time_) -+ BM_CHECK(!use_manual_time_) - << "Cannot set UseRealTime and UseManualTime simultaneously."; - use_real_time_ = true; - return this; - } - - Benchmark* Benchmark::UseManualTime() { -- CHECK(!use_real_time_) -+ BM_CHECK(!use_real_time_) - << "Cannot set UseRealTime and UseManualTime simultaneously."; - use_manual_time_ = true; - return this; -@@ -444,20 +400,21 @@ Benchmark* Benchmark::Complexity(BigOFunc* complexity) { - } - - Benchmark* Benchmark::ComputeStatistics(std::string name, -- StatisticsFunc* statistics) { -- statistics_.emplace_back(name, statistics); -+ StatisticsFunc* statistics, -+ StatisticUnit unit) { -+ statistics_.emplace_back(name, statistics, unit); - return this; - } - - Benchmark* Benchmark::Threads(int t) { -- CHECK_GT(t, 0); -+ BM_CHECK_GT(t, 0); - thread_counts_.push_back(t); - return this; - } - - Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) { -- CHECK_GT(min_threads, 0); -- CHECK_GE(max_threads, min_threads); -+ BM_CHECK_GT(min_threads, 0); -+ BM_CHECK_GE(max_threads, min_threads); - - AddRange(&thread_counts_, min_threads, max_threads, 2); - return this; -@@ -465,9 +422,9 @@ Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) { - - Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads, - int stride) { -- CHECK_GT(min_threads, 0); -- CHECK_GE(max_threads, min_threads); -- CHECK_GE(stride, 1); -+ BM_CHECK_GT(min_threads, 0); -+ BM_CHECK_GE(max_threads, min_threads); -+ BM_CHECK_GE(stride, 1); - - for (auto i = min_threads; i < max_threads; i += stride) { - thread_counts_.push_back(i); -@@ -503,4 +460,20 @@ void ClearRegisteredBenchmarks() { - internal::BenchmarkFamilies::GetInstance()->ClearBenchmarks(); - } - -+std::vector CreateRange(int64_t lo, int64_t hi, int multi) { -+ std::vector args; -+ internal::AddRange(&args, lo, hi, multi); -+ return args; -+} -+ -+std::vector CreateDenseRange(int64_t start, int64_t limit, -+ int step) { -+ BM_CHECK_LE(start, limit); -+ std::vector args; -+ for (int64_t arg = start; arg <= limit; arg += step) { -+ args.push_back(arg); -+ } -+ return args; -+} -+ - } // end namespace benchmark -diff --git a/lib/benchmark/src/benchmark_register.h b/lib/benchmark/src/benchmark_register.h -index 61377d7423..7033dbf622 100644 ---- a/lib/benchmark/src/benchmark_register.h -+++ b/lib/benchmark/src/benchmark_register.h -@@ -1,6 +1,7 @@ - #ifndef BENCHMARK_REGISTER_H - #define BENCHMARK_REGISTER_H - -+#include - #include - - #include "check.h" -@@ -13,16 +14,16 @@ namespace internal { - template - typename std::vector::iterator - AddPowers(std::vector* dst, T lo, T hi, int mult) { -- CHECK_GE(lo, 0); -- CHECK_GE(hi, lo); -- CHECK_GE(mult, 2); -+ BM_CHECK_GE(lo, 0); -+ BM_CHECK_GE(hi, lo); -+ BM_CHECK_GE(mult, 2); - - const size_t start_offset = dst->size(); - - static const T kmax = std::numeric_limits::max(); - - // Space out the values in multiples of "mult" -- for (T i = 1; i <= hi; i *= mult) { -+ for (T i = static_cast(1); i <= hi; i *= mult) { - if (i >= lo) { - dst->push_back(i); - } -@@ -37,10 +38,10 @@ AddPowers(std::vector* dst, T lo, T hi, int mult) { - template - void AddNegatedPowers(std::vector* dst, T lo, T hi, int mult) { - // We negate lo and hi so we require that they cannot be equal to 'min'. -- CHECK_GT(lo, std::numeric_limits::min()); -- CHECK_GT(hi, std::numeric_limits::min()); -- CHECK_GE(hi, lo); -- CHECK_LE(hi, 0); -+ BM_CHECK_GT(lo, std::numeric_limits::min()); -+ BM_CHECK_GT(hi, std::numeric_limits::min()); -+ BM_CHECK_GE(hi, lo); -+ BM_CHECK_LE(hi, 0); - - // Add positive powers, then negate and reverse. - // Casts necessary since small integers get promoted -@@ -59,8 +60,8 @@ void AddRange(std::vector* dst, T lo, T hi, int mult) { - static_assert(std::is_integral::value && std::is_signed::value, - "Args type must be a signed integer"); - -- CHECK_GE(hi, lo); -- CHECK_GE(mult, 2); -+ BM_CHECK_GE(hi, lo); -+ BM_CHECK_GE(mult, 2); - - // Add "lo" - dst->push_back(lo); -@@ -86,7 +87,7 @@ void AddRange(std::vector* dst, T lo, T hi, int mult) { - } - - // Treat 0 as a special case (see discussion on #762). -- if (lo <= 0 && hi >= 0) { -+ if (lo < 0 && hi >= 0) { - dst->push_back(0); - } - -diff --git a/lib/benchmark/src/benchmark_runner.cc b/lib/benchmark/src/benchmark_runner.cc -index c414eff9a9..ead5c5a26f 100644 ---- a/lib/benchmark/src/benchmark_runner.cc -+++ b/lib/benchmark/src/benchmark_runner.cc -@@ -13,6 +13,7 @@ - // limitations under the License. - - #include "benchmark_runner.h" -+ - #include "benchmark/benchmark.h" - #include "benchmark_api_internal.h" - #include "internal_macros.h" -@@ -45,6 +46,7 @@ - #include "internal_macros.h" - #include "log.h" - #include "mutex.h" -+#include "perf_counters.h" - #include "re.h" - #include "statistics.h" - #include "string_util.h" -@@ -66,32 +68,34 @@ BenchmarkReporter::Run CreateRunReport( - const internal::ThreadManager::Result& results, - IterationCount memory_iterations, - const MemoryManager::Result& memory_result, double seconds, -- int64_t repetition_index) { -+ int64_t repetition_index, int64_t repeats) { - // Create report about this benchmark run. - BenchmarkReporter::Run report; - -- report.run_name = b.name; -+ report.run_name = b.name(); -+ report.family_index = b.family_index(); -+ report.per_family_instance_index = b.per_family_instance_index(); - report.error_occurred = results.has_error_; - report.error_message = results.error_message_; - report.report_label = results.report_label_; - // This is the total iterations across all threads. - report.iterations = results.iterations; -- report.time_unit = b.time_unit; -- report.threads = b.threads; -+ report.time_unit = b.time_unit(); -+ report.threads = b.threads(); - report.repetition_index = repetition_index; -- report.repetitions = b.repetitions; -+ report.repetitions = repeats; - - if (!report.error_occurred) { -- if (b.use_manual_time) { -+ if (b.use_manual_time()) { - report.real_accumulated_time = results.manual_time_used; - } else { - report.real_accumulated_time = results.real_time_used; - } - report.cpu_accumulated_time = results.cpu_time_used; - report.complexity_n = results.complexity_n; -- report.complexity = b.complexity; -- report.complexity_lambda = b.complexity_lambda; -- report.statistics = b.statistics; -+ report.complexity = b.complexity(); -+ report.complexity_lambda = b.complexity_lambda(); -+ report.statistics = &b.statistics(); - report.counters = results.counters; - - if (memory_iterations > 0) { -@@ -103,21 +107,24 @@ BenchmarkReporter::Run CreateRunReport( - report.max_bytes_used = memory_result.max_bytes_used; - } - -- internal::Finish(&report.counters, results.iterations, seconds, b.threads); -+ internal::Finish(&report.counters, results.iterations, seconds, -+ b.threads()); - } - return report; - } - - // Execute one thread of benchmark b for the specified number of iterations. --// Adds the stats collected for the thread into *total. -+// Adds the stats collected for the thread into manager->results. - void RunInThread(const BenchmarkInstance* b, IterationCount iters, -- int thread_id, ThreadManager* manager) { -+ int thread_id, ThreadManager* manager, -+ PerfCountersMeasurement* perf_counters_measurement) { - internal::ThreadTimer timer( -- b->measure_process_cpu_time -+ b->measure_process_cpu_time() - ? internal::ThreadTimer::CreateProcessCpuTime() - : internal::ThreadTimer::Create()); -- State st = b->Run(iters, thread_id, &timer, manager); -- CHECK(st.error_occurred() || st.iterations() >= st.max_iterations) -+ State st = -+ b->Run(iters, thread_id, &timer, manager, perf_counters_measurement); -+ BM_CHECK(st.error_occurred() || st.iterations() >= st.max_iterations) - << "Benchmark returned before State::KeepRunning() returned false!"; - { - MutexLock l(manager->GetBenchmarkMutex()); -@@ -132,228 +139,208 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters, - manager->NotifyThreadComplete(); - } - --class BenchmarkRunner { -- public: -- BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, -- std::vector* complexity_reports_) -- : b(b_), -- complexity_reports(*complexity_reports_), -- min_time(!IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time), -- repeats(b.repetitions != 0 ? b.repetitions -+} // end namespace -+ -+BenchmarkRunner::BenchmarkRunner( -+ const benchmark::internal::BenchmarkInstance& b_, -+ BenchmarkReporter::PerFamilyRunReports* reports_for_family_) -+ : b(b_), -+ reports_for_family(reports_for_family_), -+ min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time), -+ repeats(b.repetitions() != 0 ? b.repetitions() - : FLAGS_benchmark_repetitions), -- has_explicit_iteration_count(b.iterations != 0), -- pool(b.threads - 1), -- iters(has_explicit_iteration_count ? b.iterations : 1) { -+ has_explicit_iteration_count(b.iterations() != 0), -+ pool(b.threads() - 1), -+ iters(has_explicit_iteration_count ? b.iterations() : 1), -+ perf_counters_measurement( -+ PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))), -+ perf_counters_measurement_ptr(perf_counters_measurement.IsValid() -+ ? &perf_counters_measurement -+ : nullptr) { -+ run_results.display_report_aggregates_only = -+ (FLAGS_benchmark_report_aggregates_only || -+ FLAGS_benchmark_display_aggregates_only); -+ run_results.file_report_aggregates_only = -+ FLAGS_benchmark_report_aggregates_only; -+ if (b.aggregation_report_mode() != internal::ARM_Unspecified) { - run_results.display_report_aggregates_only = -- (FLAGS_benchmark_report_aggregates_only || -- FLAGS_benchmark_display_aggregates_only); -+ (b.aggregation_report_mode() & -+ internal::ARM_DisplayReportAggregatesOnly); - run_results.file_report_aggregates_only = -- FLAGS_benchmark_report_aggregates_only; -- if (b.aggregation_report_mode != internal::ARM_Unspecified) { -- run_results.display_report_aggregates_only = -- (b.aggregation_report_mode & -- internal::ARM_DisplayReportAggregatesOnly); -- run_results.file_report_aggregates_only = -- (b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly); -- } -+ (b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly); -+ BM_CHECK(FLAGS_benchmark_perf_counters.empty() || -+ perf_counters_measurement.IsValid()) -+ << "Perf counters were requested but could not be set up."; -+ } -+} - -- for (int repetition_num = 0; repetition_num < repeats; repetition_num++) { -- DoOneRepetition(repetition_num); -- } -+BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() { -+ BM_VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n"; - -- // Calculate additional statistics -- run_results.aggregates_only = ComputeStats(run_results.non_aggregates); -+ std::unique_ptr manager; -+ manager.reset(new internal::ThreadManager(b.threads())); - -- // Maybe calculate complexity report -- if ((b.complexity != oNone) && b.last_benchmark_instance) { -- auto additional_run_stats = ComputeBigO(complexity_reports); -- run_results.aggregates_only.insert(run_results.aggregates_only.end(), -- additional_run_stats.begin(), -- additional_run_stats.end()); -- complexity_reports.clear(); -- } -+ // Run all but one thread in separate threads -+ for (std::size_t ti = 0; ti < pool.size(); ++ti) { -+ pool[ti] = std::thread(&RunInThread, &b, iters, static_cast(ti + 1), -+ manager.get(), perf_counters_measurement_ptr); - } -+ // And run one thread here directly. -+ // (If we were asked to run just one thread, we don't create new threads.) -+ // Yes, we need to do this here *after* we start the separate threads. -+ RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr); - -- RunResults&& get_results() { return std::move(run_results); } -+ // The main thread has finished. Now let's wait for the other threads. -+ manager->WaitForAllThreads(); -+ for (std::thread& thread : pool) thread.join(); - -- private: -- RunResults run_results; -+ IterationResults i; -+ // Acquire the measurements/counters from the manager, UNDER THE LOCK! -+ { -+ MutexLock l(manager->GetBenchmarkMutex()); -+ i.results = manager->results; -+ } - -- const benchmark::internal::BenchmarkInstance& b; -- std::vector& complexity_reports; -+ // And get rid of the manager. -+ manager.reset(); - -- const double min_time; -- const int repeats; -- const bool has_explicit_iteration_count; -+ // Adjust real/manual time stats since they were reported per thread. -+ i.results.real_time_used /= b.threads(); -+ i.results.manual_time_used /= b.threads(); -+ // If we were measuring whole-process CPU usage, adjust the CPU time too. -+ if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads(); - -- std::vector pool; -+ BM_VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" -+ << i.results.real_time_used << "\n"; - -- IterationCount iters; // preserved between repetitions! -- // So only the first repetition has to find/calculate it, -- // the other repetitions will just use that precomputed iteration count. -+ // By using KeepRunningBatch a benchmark can iterate more times than -+ // requested, so take the iteration count from i.results. -+ i.iters = i.results.iterations / b.threads(); - -- struct IterationResults { -- internal::ThreadManager::Result results; -- IterationCount iters; -- double seconds; -- }; -- IterationResults DoNIterations() { -- VLOG(2) << "Running " << b.name.str() << " for " << iters << "\n"; -+ // Base decisions off of real time if requested by this benchmark. -+ i.seconds = i.results.cpu_time_used; -+ if (b.use_manual_time()) { -+ i.seconds = i.results.manual_time_used; -+ } else if (b.use_real_time()) { -+ i.seconds = i.results.real_time_used; -+ } - -- std::unique_ptr manager; -- manager.reset(new internal::ThreadManager(b.threads)); -+ return i; -+} - -- // Run all but one thread in separate threads -- for (std::size_t ti = 0; ti < pool.size(); ++ti) { -- pool[ti] = std::thread(&RunInThread, &b, iters, static_cast(ti + 1), -- manager.get()); -- } -- // And run one thread here directly. -- // (If we were asked to run just one thread, we don't create new threads.) -- // Yes, we need to do this here *after* we start the separate threads. -- RunInThread(&b, iters, 0, manager.get()); -+IterationCount BenchmarkRunner::PredictNumItersNeeded( -+ const IterationResults& i) const { -+ // See how much iterations should be increased by. -+ // Note: Avoid division by zero with max(seconds, 1ns). -+ double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9); -+ // If our last run was at least 10% of FLAGS_benchmark_min_time then we -+ // use the multiplier directly. -+ // Otherwise we use at most 10 times expansion. -+ // NOTE: When the last run was at least 10% of the min time the max -+ // expansion should be 14x. -+ bool is_significant = (i.seconds / min_time) > 0.1; -+ multiplier = is_significant ? multiplier : 10.0; -+ -+ // So what seems to be the sufficiently-large iteration count? Round up. -+ const IterationCount max_next_iters = static_cast( -+ std::lround(std::max(multiplier * static_cast(i.iters), -+ static_cast(i.iters) + 1.0))); -+ // But we do have *some* sanity limits though.. -+ const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); -+ -+ BM_VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; -+ return next_iters; // round up before conversion to integer. -+} - -- // The main thread has finished. Now let's wait for the other threads. -- manager->WaitForAllThreads(); -- for (std::thread& thread : pool) thread.join(); -+bool BenchmarkRunner::ShouldReportIterationResults( -+ const IterationResults& i) const { -+ // Determine if this run should be reported; -+ // Either it has run for a sufficient amount of time -+ // or because an error was reported. -+ return i.results.has_error_ || -+ i.iters >= kMaxIterations || // Too many iterations already. -+ i.seconds >= min_time || // The elapsed time is large enough. -+ // CPU time is specified but the elapsed real time greatly exceeds -+ // the minimum time. -+ // Note that user provided timers are except from this sanity check. -+ ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time()); -+} - -- IterationResults i; -- // Acquire the measurements/counters from the manager, UNDER THE LOCK! -- { -- MutexLock l(manager->GetBenchmarkMutex()); -- i.results = manager->results; -- } -+void BenchmarkRunner::DoOneRepetition() { -+ assert(HasRepeatsRemaining() && "Already done all repetitions?"); -+ -+ const bool is_the_first_repetition = num_repetitions_done == 0; -+ IterationResults i; -+ -+ // We *may* be gradually increasing the length (iteration count) -+ // of the benchmark until we decide the results are significant. -+ // And once we do, we report those last results and exit. -+ // Please do note that the if there are repetitions, the iteration count -+ // is *only* calculated for the *first* repetition, and other repetitions -+ // simply use that precomputed iteration count. -+ for (;;) { -+ i = DoNIterations(); -+ -+ // Do we consider the results to be significant? -+ // If we are doing repetitions, and the first repetition was already done, -+ // it has calculated the correct iteration time, so we have run that very -+ // iteration count just now. No need to calculate anything. Just report. -+ // Else, the normal rules apply. -+ const bool results_are_significant = !is_the_first_repetition || -+ has_explicit_iteration_count || -+ ShouldReportIterationResults(i); -+ -+ if (results_are_significant) break; // Good, let's report them! -+ -+ // Nope, bad iteration. Let's re-estimate the hopefully-sufficient -+ // iteration count, and run the benchmark again... -+ -+ iters = PredictNumItersNeeded(i); -+ assert(iters > i.iters && -+ "if we did more iterations than we want to do the next time, " -+ "then we should have accepted the current iteration run."); -+ } - -- // And get rid of the manager. -+ // Oh, one last thing, we need to also produce the 'memory measurements'.. -+ MemoryManager::Result memory_result; -+ IterationCount memory_iterations = 0; -+ if (memory_manager != nullptr) { -+ // Only run a few iterations to reduce the impact of one-time -+ // allocations in benchmarks that are not properly managed. -+ memory_iterations = std::min(16, iters); -+ memory_manager->Start(); -+ std::unique_ptr manager; -+ manager.reset(new internal::ThreadManager(1)); -+ RunInThread(&b, memory_iterations, 0, manager.get(), -+ perf_counters_measurement_ptr); -+ manager->WaitForAllThreads(); - manager.reset(); - -- // Adjust real/manual time stats since they were reported per thread. -- i.results.real_time_used /= b.threads; -- i.results.manual_time_used /= b.threads; -- // If we were measuring whole-process CPU usage, adjust the CPU time too. -- if (b.measure_process_cpu_time) i.results.cpu_time_used /= b.threads; -- -- VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" -- << i.results.real_time_used << "\n"; -- -- // So for how long were we running? -- i.iters = iters; -- // Base decisions off of real time if requested by this benchmark. -- i.seconds = i.results.cpu_time_used; -- if (b.use_manual_time) { -- i.seconds = i.results.manual_time_used; -- } else if (b.use_real_time) { -- i.seconds = i.results.real_time_used; -- } -- -- return i; -+ memory_manager->Stop(&memory_result); - } - -- IterationCount PredictNumItersNeeded(const IterationResults& i) const { -- // See how much iterations should be increased by. -- // Note: Avoid division by zero with max(seconds, 1ns). -- double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9); -- // If our last run was at least 10% of FLAGS_benchmark_min_time then we -- // use the multiplier directly. -- // Otherwise we use at most 10 times expansion. -- // NOTE: When the last run was at least 10% of the min time the max -- // expansion should be 14x. -- bool is_significant = (i.seconds / min_time) > 0.1; -- multiplier = is_significant ? multiplier : std::min(10.0, multiplier); -- if (multiplier <= 1.0) multiplier = 2.0; -- -- // So what seems to be the sufficiently-large iteration count? Round up. -- const IterationCount max_next_iters = -- std::lround(std::max(multiplier * i.iters, i.iters + 1.0)); -- // But we do have *some* sanity limits though.. -- const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); -- -- VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; -- return next_iters; // round up before conversion to integer. -- } -+ // Ok, now actually report. -+ BenchmarkReporter::Run report = -+ CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds, -+ num_repetitions_done, repeats); - -- bool ShouldReportIterationResults(const IterationResults& i) const { -- // Determine if this run should be reported; -- // Either it has run for a sufficient amount of time -- // or because an error was reported. -- return i.results.has_error_ || -- i.iters >= kMaxIterations || // Too many iterations already. -- i.seconds >= min_time || // The elapsed time is large enough. -- // CPU time is specified but the elapsed real time greatly exceeds -- // the minimum time. -- // Note that user provided timers are except from this sanity check. -- ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time); -+ if (reports_for_family) { -+ ++reports_for_family->num_runs_done; -+ if (!report.error_occurred) reports_for_family->Runs.push_back(report); - } - -- void DoOneRepetition(int64_t repetition_index) { -- const bool is_the_first_repetition = repetition_index == 0; -- IterationResults i; -- -- // We *may* be gradually increasing the length (iteration count) -- // of the benchmark until we decide the results are significant. -- // And once we do, we report those last results and exit. -- // Please do note that the if there are repetitions, the iteration count -- // is *only* calculated for the *first* repetition, and other repetitions -- // simply use that precomputed iteration count. -- for (;;) { -- i = DoNIterations(); -- -- // Do we consider the results to be significant? -- // If we are doing repetitions, and the first repetition was already done, -- // it has calculated the correct iteration time, so we have run that very -- // iteration count just now. No need to calculate anything. Just report. -- // Else, the normal rules apply. -- const bool results_are_significant = !is_the_first_repetition || -- has_explicit_iteration_count || -- ShouldReportIterationResults(i); -- -- if (results_are_significant) break; // Good, let's report them! -- -- // Nope, bad iteration. Let's re-estimate the hopefully-sufficient -- // iteration count, and run the benchmark again... -- -- iters = PredictNumItersNeeded(i); -- assert(iters > i.iters && -- "if we did more iterations than we want to do the next time, " -- "then we should have accepted the current iteration run."); -- } -- -- // Oh, one last thing, we need to also produce the 'memory measurements'.. -- MemoryManager::Result memory_result; -- IterationCount memory_iterations = 0; -- if (memory_manager != nullptr) { -- // Only run a few iterations to reduce the impact of one-time -- // allocations in benchmarks that are not properly managed. -- memory_iterations = std::min(16, iters); -- memory_manager->Start(); -- std::unique_ptr manager; -- manager.reset(new internal::ThreadManager(1)); -- RunInThread(&b, memory_iterations, 0, manager.get()); -- manager->WaitForAllThreads(); -- manager.reset(); -- -- memory_manager->Stop(&memory_result); -- } -- -- // Ok, now actualy report. -- BenchmarkReporter::Run report = -- CreateRunReport(b, i.results, memory_iterations, memory_result, -- i.seconds, repetition_index); -+ run_results.non_aggregates.push_back(report); - -- if (!report.error_occurred && b.complexity != oNone) -- complexity_reports.push_back(report); -+ ++num_repetitions_done; -+} - -- run_results.non_aggregates.push_back(report); -- } --}; -+RunResults&& BenchmarkRunner::GetResults() { -+ assert(!HasRepeatsRemaining() && "Did not run all repetitions yet?"); - --} // end namespace -+ // Calculate additional statistics over the repetitions of this instance. -+ run_results.aggregates_only = ComputeStats(run_results.non_aggregates); - --RunResults RunBenchmark( -- const benchmark::internal::BenchmarkInstance& b, -- std::vector* complexity_reports) { -- internal::BenchmarkRunner r(b, complexity_reports); -- return r.get_results(); -+ return std::move(run_results); - } - - } // end namespace internal -diff --git a/lib/benchmark/src/benchmark_runner.h b/lib/benchmark/src/benchmark_runner.h -index 96e8282a11..8427ce6a26 100644 ---- a/lib/benchmark/src/benchmark_runner.h -+++ b/lib/benchmark/src/benchmark_runner.h -@@ -15,19 +15,22 @@ - #ifndef BENCHMARK_RUNNER_H_ - #define BENCHMARK_RUNNER_H_ - -+#include -+#include -+ - #include "benchmark_api_internal.h" - #include "internal_macros.h" -- --DECLARE_double(benchmark_min_time); -- --DECLARE_int32(benchmark_repetitions); -- --DECLARE_bool(benchmark_report_aggregates_only); -- --DECLARE_bool(benchmark_display_aggregates_only); -+#include "perf_counters.h" -+#include "thread_manager.h" - - namespace benchmark { - -+BM_DECLARE_double(benchmark_min_time); -+BM_DECLARE_int32(benchmark_repetitions); -+BM_DECLARE_bool(benchmark_report_aggregates_only); -+BM_DECLARE_bool(benchmark_display_aggregates_only); -+BM_DECLARE_string(benchmark_perf_counters); -+ - namespace internal { - - extern MemoryManager* memory_manager; -@@ -40,9 +43,57 @@ struct RunResults { - bool file_report_aggregates_only = false; - }; - --RunResults RunBenchmark( -- const benchmark::internal::BenchmarkInstance& b, -- std::vector* complexity_reports); -+class BenchmarkRunner { -+ public: -+ BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, -+ BenchmarkReporter::PerFamilyRunReports* reports_for_family); -+ -+ int GetNumRepeats() const { return repeats; } -+ -+ bool HasRepeatsRemaining() const { -+ return GetNumRepeats() != num_repetitions_done; -+ } -+ -+ void DoOneRepetition(); -+ -+ RunResults&& GetResults(); -+ -+ BenchmarkReporter::PerFamilyRunReports* GetReportsForFamily() const { -+ return reports_for_family; -+ } -+ -+ private: -+ RunResults run_results; -+ -+ const benchmark::internal::BenchmarkInstance& b; -+ BenchmarkReporter::PerFamilyRunReports* reports_for_family; -+ -+ const double min_time; -+ const int repeats; -+ const bool has_explicit_iteration_count; -+ -+ int num_repetitions_done = 0; -+ -+ std::vector pool; -+ -+ IterationCount iters; // preserved between repetitions! -+ // So only the first repetition has to find/calculate it, -+ // the other repetitions will just use that precomputed iteration count. -+ -+ PerfCountersMeasurement perf_counters_measurement; -+ PerfCountersMeasurement* const perf_counters_measurement_ptr; -+ -+ struct IterationResults { -+ internal::ThreadManager::Result results; -+ IterationCount iters; -+ double seconds; -+ }; -+ IterationResults DoNIterations(); -+ -+ IterationCount PredictNumItersNeeded(const IterationResults& i) const; -+ -+ bool ShouldReportIterationResults(const IterationResults& i) const; -+}; - - } // namespace internal - -diff --git a/lib/benchmark/src/check.h b/lib/benchmark/src/check.h -index f5f8253f80..0efd13ff4d 100644 ---- a/lib/benchmark/src/check.h -+++ b/lib/benchmark/src/check.h -@@ -23,8 +23,9 @@ BENCHMARK_NORETURN inline void CallAbortHandler() { - std::abort(); // fallback to enforce noreturn - } - --// CheckHandler is the class constructed by failing CHECK macros. CheckHandler --// will log information about the failures and abort when it is destructed. -+// CheckHandler is the class constructed by failing BM_CHECK macros. -+// CheckHandler will log information about the failures and abort when it is -+// destructed. - class CheckHandler { - public: - CheckHandler(const char* check, const char* file, const char* func, int line) -@@ -51,32 +52,32 @@ class CheckHandler { - } // end namespace internal - } // end namespace benchmark - --// The CHECK macro returns a std::ostream object that can have extra information --// written to it. -+// The BM_CHECK macro returns a std::ostream object that can have extra -+// information written to it. - #ifndef NDEBUG --#define CHECK(b) \ -+#define BM_CHECK(b) \ - (b ? ::benchmark::internal::GetNullLogInstance() \ - : ::benchmark::internal::CheckHandler(#b, __FILE__, __func__, __LINE__) \ - .GetLog()) - #else --#define CHECK(b) ::benchmark::internal::GetNullLogInstance() -+#define BM_CHECK(b) ::benchmark::internal::GetNullLogInstance() - #endif - - // clang-format off - // preserve whitespacing between operators for alignment --#define CHECK_EQ(a, b) CHECK((a) == (b)) --#define CHECK_NE(a, b) CHECK((a) != (b)) --#define CHECK_GE(a, b) CHECK((a) >= (b)) --#define CHECK_LE(a, b) CHECK((a) <= (b)) --#define CHECK_GT(a, b) CHECK((a) > (b)) --#define CHECK_LT(a, b) CHECK((a) < (b)) -- --#define CHECK_FLOAT_EQ(a, b, eps) CHECK(std::fabs((a) - (b)) < (eps)) --#define CHECK_FLOAT_NE(a, b, eps) CHECK(std::fabs((a) - (b)) >= (eps)) --#define CHECK_FLOAT_GE(a, b, eps) CHECK((a) - (b) > -(eps)) --#define CHECK_FLOAT_LE(a, b, eps) CHECK((b) - (a) > -(eps)) --#define CHECK_FLOAT_GT(a, b, eps) CHECK((a) - (b) > (eps)) --#define CHECK_FLOAT_LT(a, b, eps) CHECK((b) - (a) > (eps)) -+#define BM_CHECK_EQ(a, b) BM_CHECK((a) == (b)) -+#define BM_CHECK_NE(a, b) BM_CHECK((a) != (b)) -+#define BM_CHECK_GE(a, b) BM_CHECK((a) >= (b)) -+#define BM_CHECK_LE(a, b) BM_CHECK((a) <= (b)) -+#define BM_CHECK_GT(a, b) BM_CHECK((a) > (b)) -+#define BM_CHECK_LT(a, b) BM_CHECK((a) < (b)) -+ -+#define BM_CHECK_FLOAT_EQ(a, b, eps) BM_CHECK(std::fabs((a) - (b)) < (eps)) -+#define BM_CHECK_FLOAT_NE(a, b, eps) BM_CHECK(std::fabs((a) - (b)) >= (eps)) -+#define BM_CHECK_FLOAT_GE(a, b, eps) BM_CHECK((a) - (b) > -(eps)) -+#define BM_CHECK_FLOAT_LE(a, b, eps) BM_CHECK((b) - (a) > -(eps)) -+#define BM_CHECK_FLOAT_GT(a, b, eps) BM_CHECK((a) - (b) > (eps)) -+#define BM_CHECK_FLOAT_LT(a, b, eps) BM_CHECK((b) - (a) > (eps)) - //clang-format on - - #endif // CHECK_H_ -diff --git a/lib/benchmark/src/colorprint.cc b/lib/benchmark/src/colorprint.cc -index fff6a98818..afaa55dd54 100644 ---- a/lib/benchmark/src/colorprint.cc -+++ b/lib/benchmark/src/colorprint.cc -@@ -94,7 +94,7 @@ std::string FormatString(const char* msg, va_list args) { - va_end(args_cp); - - // currently there is no error handling for failure, so this is hack. -- CHECK(ret >= 0); -+ BM_CHECK(ret >= 0); - - if (ret == 0) // handle empty expansion - return {}; -@@ -105,7 +105,7 @@ std::string FormatString(const char* msg, va_list args) { - size = (size_t)ret + 1; // + 1 for the null byte - std::unique_ptr buff(new char[size]); - ret = vsnprintf(buff.get(), size, msg, args); -- CHECK(ret > 0 && ((size_t)ret) < size); -+ BM_CHECK(ret > 0 && ((size_t)ret) < size); - return buff.get(); - } - } -diff --git a/lib/benchmark/src/commandlineflags.cc b/lib/benchmark/src/commandlineflags.cc -index 3380a127a8..5724aaa294 100644 ---- a/lib/benchmark/src/commandlineflags.cc -+++ b/lib/benchmark/src/commandlineflags.cc -@@ -20,6 +20,10 @@ - #include - #include - #include -+#include -+#include -+ -+#include "../src/string_util.h" - - namespace benchmark { - namespace { -@@ -78,6 +82,30 @@ bool ParseDouble(const std::string& src_text, const char* str, double* value) { - return true; - } - -+// Parses 'str' into KV pairs. If successful, writes the result to *value and -+// returns true; otherwise leaves *value unchanged and returns false. -+bool ParseKvPairs(const std::string& src_text, const char* str, -+ std::map* value) { -+ std::map kvs; -+ for (const auto& kvpair : StrSplit(str, ',')) { -+ const auto kv = StrSplit(kvpair, '='); -+ if (kv.size() != 2) { -+ std::cerr << src_text << " is expected to be a comma-separated list of " -+ << "= strings, but actually has value \"" << str -+ << "\".\n"; -+ return false; -+ } -+ if (!kvs.emplace(kv[0], kv[1]).second) { -+ std::cerr << src_text << " is expected to contain unique keys but key \"" -+ << kv[0] << "\" was repeated.\n"; -+ return false; -+ } -+ } -+ -+ *value = kvs; -+ return true; -+} -+ - // Returns the name of the environment variable corresponding to the - // given flag. For example, FlagToEnvVar("foo") will return - // "BENCHMARK_FOO" in the open-source version. -@@ -88,7 +116,7 @@ static std::string FlagToEnvVar(const char* flag) { - for (size_t i = 0; i != flag_str.length(); ++i) - env_var += static_cast(::toupper(flag_str.c_str()[i])); - -- return "BENCHMARK_" + env_var; -+ return env_var; - } - - } // namespace -@@ -129,6 +157,20 @@ const char* StringFromEnv(const char* flag, const char* default_val) { - return value == nullptr ? default_val : value; - } - -+std::map KvPairsFromEnv( -+ const char* flag, std::map default_val) { -+ const std::string env_var = FlagToEnvVar(flag); -+ const char* const value_str = getenv(env_var.c_str()); -+ -+ if (value_str == nullptr) return default_val; -+ -+ std::map value; -+ if (!ParseKvPairs("Environment variable " + env_var, value_str, &value)) { -+ return default_val; -+ } -+ return value; -+} -+ - // Parses a string as a command line flag. The string should have - // the format "--flag=value". When def_optional is true, the "=value" - // part can be omitted. -@@ -206,6 +248,22 @@ bool ParseStringFlag(const char* str, const char* flag, std::string* value) { - return true; - } - -+bool ParseKeyValueFlag( -+ const char* str, const char* flag, -+ std::map* value) { -+ const char* const value_str = ParseFlagValue(str, flag, false); -+ -+ if (value_str == nullptr) return false; -+ -+ for (const auto& kvpair : StrSplit(value_str, ',')) { -+ const auto kv = StrSplit(kvpair, '='); -+ if (kv.size() != 2) return false; -+ value->emplace(kv[0], kv[1]); -+ } -+ -+ return true; -+} -+ - bool IsFlag(const char* str, const char* flag) { - return (ParseFlagValue(str, flag, true) != nullptr); - } -diff --git a/lib/benchmark/src/commandlineflags.h b/lib/benchmark/src/commandlineflags.h -index 3a1f6a8dbc..5baaf11784 100644 ---- a/lib/benchmark/src/commandlineflags.h -+++ b/lib/benchmark/src/commandlineflags.h -@@ -2,61 +2,70 @@ - #define BENCHMARK_COMMANDLINEFLAGS_H_ - - #include -+#include - #include - - // Macro for referencing flags. - #define FLAG(name) FLAGS_##name - - // Macros for declaring flags. --#define DECLARE_bool(name) extern bool FLAG(name) --#define DECLARE_int32(name) extern int32_t FLAG(name) --#define DECLARE_double(name) extern double FLAG(name) --#define DECLARE_string(name) extern std::string FLAG(name) -+#define BM_DECLARE_bool(name) extern bool FLAG(name) -+#define BM_DECLARE_int32(name) extern int32_t FLAG(name) -+#define BM_DECLARE_double(name) extern double FLAG(name) -+#define BM_DECLARE_string(name) extern std::string FLAG(name) -+#define BM_DECLARE_kvpairs(name) \ -+ extern std::map FLAG(name) - - // Macros for defining flags. --#define DEFINE_bool(name, default_val) \ -- bool FLAG(name) = \ -- benchmark::BoolFromEnv(#name, default_val) --#define DEFINE_int32(name, default_val) \ -- int32_t FLAG(name) = \ -- benchmark::Int32FromEnv(#name, default_val) --#define DEFINE_double(name, default_val) \ -- double FLAG(name) = \ -- benchmark::DoubleFromEnv(#name, default_val) --#define DEFINE_string(name, default_val) \ -- std::string FLAG(name) = \ -- benchmark::StringFromEnv(#name, default_val) -+#define BM_DEFINE_bool(name, default_val) \ -+ bool FLAG(name) = benchmark::BoolFromEnv(#name, default_val) -+#define BM_DEFINE_int32(name, default_val) \ -+ int32_t FLAG(name) = benchmark::Int32FromEnv(#name, default_val) -+#define BM_DEFINE_double(name, default_val) \ -+ double FLAG(name) = benchmark::DoubleFromEnv(#name, default_val) -+#define BM_DEFINE_string(name, default_val) \ -+ std::string FLAG(name) = benchmark::StringFromEnv(#name, default_val) -+#define BM_DEFINE_kvpairs(name, default_val) \ -+ std::map FLAG(name) = \ -+ benchmark::KvPairsFromEnv(#name, default_val) - - namespace benchmark { - --// Parses a bool from the environment variable --// corresponding to the given flag. -+// Parses a bool from the environment variable corresponding to the given flag. - // - // If the variable exists, returns IsTruthyFlagValue() value; if not, - // returns the given default value. - bool BoolFromEnv(const char* flag, bool default_val); - --// Parses an Int32 from the environment variable --// corresponding to the given flag. -+// Parses an Int32 from the environment variable corresponding to the given -+// flag. - // - // If the variable exists, returns ParseInt32() value; if not, returns - // the given default value. - int32_t Int32FromEnv(const char* flag, int32_t default_val); - --// Parses an Double from the environment variable --// corresponding to the given flag. -+// Parses an Double from the environment variable corresponding to the given -+// flag. - // - // If the variable exists, returns ParseDouble(); if not, returns - // the given default value. - double DoubleFromEnv(const char* flag, double default_val); - --// Parses a string from the environment variable --// corresponding to the given flag. -+// Parses a string from the environment variable corresponding to the given -+// flag. - // - // If variable exists, returns its value; if not, returns - // the given default value. - const char* StringFromEnv(const char* flag, const char* default_val); - -+// Parses a set of kvpairs from the environment variable corresponding to the -+// given flag. -+// -+// If variable exists, returns its value; if not, returns -+// the given default value. -+std::map KvPairsFromEnv( -+ const char* flag, std::map default_val); -+ - // Parses a string for a bool flag, in the form of either - // "--flag=value" or "--flag". - // -@@ -68,27 +77,31 @@ const char* StringFromEnv(const char* flag, const char* default_val); - // true. On failure, returns false without changing *value. - bool ParseBoolFlag(const char* str, const char* flag, bool* value); - --// Parses a string for an Int32 flag, in the form of --// "--flag=value". -+// Parses a string for an Int32 flag, in the form of "--flag=value". - // - // On success, stores the value of the flag in *value, and returns - // true. On failure, returns false without changing *value. - bool ParseInt32Flag(const char* str, const char* flag, int32_t* value); - --// Parses a string for a Double flag, in the form of --// "--flag=value". -+// Parses a string for a Double flag, in the form of "--flag=value". - // - // On success, stores the value of the flag in *value, and returns - // true. On failure, returns false without changing *value. - bool ParseDoubleFlag(const char* str, const char* flag, double* value); - --// Parses a string for a string flag, in the form of --// "--flag=value". -+// Parses a string for a string flag, in the form of "--flag=value". - // - // On success, stores the value of the flag in *value, and returns - // true. On failure, returns false without changing *value. - bool ParseStringFlag(const char* str, const char* flag, std::string* value); - -+// Parses a string for a kvpairs flag in the form "--flag=key=value,key=value" -+// -+// On success, stores the value of the flag in *value and returns true. On -+// failure returns false, though *value may have been mutated. -+bool ParseKeyValueFlag(const char* str, const char* flag, -+ std::map* value); -+ - // Returns true if the string matches the flag. - bool IsFlag(const char* str, const char* flag); - -diff --git a/lib/benchmark/src/complexity.cc b/lib/benchmark/src/complexity.cc -index aeed67f0c7..b87697fb94 100644 ---- a/lib/benchmark/src/complexity.cc -+++ b/lib/benchmark/src/complexity.cc -@@ -82,7 +82,6 @@ std::string GetBigOString(BigO complexity) { - LeastSq MinimalLeastSq(const std::vector& n, - const std::vector& time, - BigOFunc* fitting_curve) { -- double sigma_gn = 0.0; - double sigma_gn_squared = 0.0; - double sigma_time = 0.0; - double sigma_time_gn = 0.0; -@@ -90,7 +89,6 @@ LeastSq MinimalLeastSq(const std::vector& n, - // Calculate least square fitting parameter - for (size_t i = 0; i < n.size(); ++i) { - double gn_i = fitting_curve(n[i]); -- sigma_gn += gn_i; - sigma_gn_squared += gn_i * gn_i; - sigma_time += time[i]; - sigma_time_gn += time[i] * gn_i; -@@ -125,10 +123,10 @@ LeastSq MinimalLeastSq(const std::vector& n, - // fitting curve. - LeastSq MinimalLeastSq(const std::vector& n, - const std::vector& time, const BigO complexity) { -- CHECK_EQ(n.size(), time.size()); -- CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two -- // benchmark runs are given -- CHECK_NE(complexity, oNone); -+ BM_CHECK_EQ(n.size(), time.size()); -+ BM_CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two -+ // benchmark runs are given -+ BM_CHECK_NE(complexity, oNone); - - LeastSq best_fit; - -@@ -169,7 +167,8 @@ std::vector ComputeBigO( - - // Populate the accumulators. - for (const Run& run : reports) { -- CHECK_GT(run.complexity_n, 0) << "Did you forget to call SetComplexityN?"; -+ BM_CHECK_GT(run.complexity_n, 0) -+ << "Did you forget to call SetComplexityN?"; - n.push_back(run.complexity_n); - real_time.push_back(run.real_accumulated_time / run.iterations); - cpu_time.push_back(run.cpu_accumulated_time / run.iterations); -@@ -193,11 +192,14 @@ std::vector ComputeBigO( - // Get the data from the accumulator to BenchmarkReporter::Run's. - Run big_o; - big_o.run_name = run_name; -+ big_o.family_index = reports[0].family_index; -+ big_o.per_family_instance_index = reports[0].per_family_instance_index; - big_o.run_type = BenchmarkReporter::Run::RT_Aggregate; - big_o.repetitions = reports[0].repetitions; - big_o.repetition_index = Run::no_repetition_index; - big_o.threads = reports[0].threads; - big_o.aggregate_name = "BigO"; -+ big_o.aggregate_unit = StatisticUnit::kTime; - big_o.report_label = reports[0].report_label; - big_o.iterations = 0; - big_o.real_accumulated_time = result_real.coef; -@@ -215,8 +217,11 @@ std::vector ComputeBigO( - // Only add label to mean/stddev if it is same for all runs - Run rms; - rms.run_name = run_name; -+ rms.family_index = reports[0].family_index; -+ rms.per_family_instance_index = reports[0].per_family_instance_index; - rms.run_type = BenchmarkReporter::Run::RT_Aggregate; - rms.aggregate_name = "RMS"; -+ rms.aggregate_unit = StatisticUnit::kPercentage; - rms.report_label = big_o.report_label; - rms.iterations = 0; - rms.repetition_index = Run::no_repetition_index; -diff --git a/lib/benchmark/src/console_reporter.cc b/lib/benchmark/src/console_reporter.cc -index 6fd764525e..61c34da822 100644 ---- a/lib/benchmark/src/console_reporter.cc -+++ b/lib/benchmark/src/console_reporter.cc -@@ -142,10 +142,16 @@ void ConsoleReporter::PrintRunData(const Run& result) { - } else if (result.report_rms) { - printer(Out, COLOR_YELLOW, "%10.0f %-4s %10.0f %-4s ", real_time * 100, "%", - cpu_time * 100, "%"); -- } else { -+ } else if (result.run_type != Run::RT_Aggregate || -+ result.aggregate_unit == StatisticUnit::kTime) { - const char* timeLabel = GetTimeUnitString(result.time_unit); - printer(Out, COLOR_YELLOW, "%s %-4s %s %-4s ", real_time_str.c_str(), timeLabel, - cpu_time_str.c_str(), timeLabel); -+ } else { -+ assert(result.aggregate_unit == StatisticUnit::kPercentage); -+ printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", -+ (100. * result.real_accumulated_time), "%", -+ (100. * result.cpu_accumulated_time), "%"); - } - - if (!result.report_big_o && !result.report_rms) { -@@ -155,10 +161,17 @@ void ConsoleReporter::PrintRunData(const Run& result) { - for (auto& c : result.counters) { - const std::size_t cNameLen = std::max(std::string::size_type(10), - c.first.length()); -- auto const& s = HumanReadableNumber(c.second.value, c.second.oneK); -+ std::string s; - const char* unit = ""; -- if (c.second.flags & Counter::kIsRate) -- unit = (c.second.flags & Counter::kInvert) ? "s" : "/s"; -+ if (result.run_type == Run::RT_Aggregate && -+ result.aggregate_unit == StatisticUnit::kPercentage) { -+ s = StrFormat("%.2f", 100. * c.second.value); -+ unit = "%"; -+ } else { -+ s = HumanReadableNumber(c.second.value, c.second.oneK); -+ if (c.second.flags & Counter::kIsRate) -+ unit = (c.second.flags & Counter::kInvert) ? "s" : "/s"; -+ } - if (output_options_ & OO_Tabular) { - printer(Out, COLOR_DEFAULT, " %*s%s", cNameLen - strlen(unit), s.c_str(), - unit); -diff --git a/lib/benchmark/src/csv_reporter.cc b/lib/benchmark/src/csv_reporter.cc -index af2c18fc8a..9bd7121daf 100644 ---- a/lib/benchmark/src/csv_reporter.cc -+++ b/lib/benchmark/src/csv_reporter.cc -@@ -85,7 +85,8 @@ void CSVReporter::ReportRuns(const std::vector& reports) { - for (const auto& cnt : run.counters) { - if (cnt.first == "bytes_per_second" || cnt.first == "items_per_second") - continue; -- CHECK(user_counter_names_.find(cnt.first) != user_counter_names_.end()) -+ BM_CHECK(user_counter_names_.find(cnt.first) != -+ user_counter_names_.end()) - << "All counters must be present in each run. " - << "Counter named \"" << cnt.first - << "\" was not in a run after being added to the header"; -diff --git a/lib/benchmark/src/cycleclock.h b/lib/benchmark/src/cycleclock.h -index d5d62c4c7f..f22ca9f7d2 100644 ---- a/lib/benchmark/src/cycleclock.h -+++ b/lib/benchmark/src/cycleclock.h -@@ -36,7 +36,7 @@ - // declarations of some other intrinsics, breaking compilation. - // Therefore, we simply declare __rdtsc ourselves. See also - // http://connect.microsoft.com/VisualStudio/feedback/details/262047 --#if defined(COMPILER_MSVC) && !defined(_M_IX86) -+#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) - extern "C" uint64_t __rdtsc(); - #pragma intrinsic(__rdtsc) - #endif -@@ -84,13 +84,21 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { - return (high << 32) | low; - #elif defined(__powerpc__) || defined(__ppc__) - // This returns a time-base, which is not always precisely a cycle-count. -- int64_t tbl, tbu0, tbu1; -- asm("mftbu %0" : "=r"(tbu0)); -- asm("mftb %0" : "=r"(tbl)); -- asm("mftbu %0" : "=r"(tbu1)); -- tbl &= -static_cast(tbu0 == tbu1); -- // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage) -- return (tbu1 << 32) | tbl; -+#if defined(__powerpc64__) || defined(__ppc64__) -+ int64_t tb; -+ asm volatile("mfspr %0, 268" : "=r"(tb)); -+ return tb; -+#else -+ uint32_t tbl, tbu0, tbu1; -+ asm volatile( -+ "mftbu %0\n" -+ "mftb %1\n" -+ "mftbu %2" -+ : "=r"(tbu0), "=r"(tbl), "=r"(tbu1)); -+ tbl &= -static_cast(tbu0 == tbu1); -+ // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is no longer needed) -+ return (static_cast(tbu1) << 32) | tbl; -+#endif - #elif defined(__sparc__) - int64_t tick; - asm(".byte 0x83, 0x41, 0x00, 0x00"); -@@ -106,6 +114,12 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { - // when I know it will work. Otherwise, I'll use __rdtsc and hope - // the code is being compiled with a non-ancient compiler. - _asm rdtsc -+#elif defined(COMPILER_MSVC) && defined(_M_ARM64) -+ // See https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=vs-2019 -+ // and https://reviews.llvm.org/D53115 -+ int64_t virtual_timer_value; -+ virtual_timer_value = _ReadStatusReg(ARM64_CNTVCT); -+ return virtual_timer_value; - #elif defined(COMPILER_MSVC) - return __rdtsc(); - #elif defined(BENCHMARK_OS_NACL) -@@ -153,32 +167,51 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { - struct timeval tv; - gettimeofday(&tv, nullptr); - return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; --#elif defined(__mips__) -+#elif defined(__mips__) || defined(__m68k__) - // mips apparently only allows rdtsc for superusers, so we fall - // back to gettimeofday. It's possible clock_gettime would be better. - struct timeval tv; - gettimeofday(&tv, nullptr); - return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; -+#elif defined(__loongarch__) -+ struct timeval tv; -+ gettimeofday(&tv, nullptr); -+ return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; - #elif defined(__s390__) // Covers both s390 and s390x. - // Return the CPU clock. - uint64_t tsc; -+#if defined(BENCHMARK_OS_ZOS) && defined(COMPILER_IBMXL) -+ // z/OS XL compiler HLASM syntax. -+ asm(" stck %0" : "=m"(tsc) : : "cc"); -+#else - asm("stck %0" : "=Q"(tsc) : : "cc"); -+#endif - return tsc; - #elif defined(__riscv) // RISC-V - // Use RDCYCLE (and RDCYCLEH on riscv32) - #if __riscv_xlen == 32 -- uint64_t cycles_low, cycles_hi0, cycles_hi1; -- asm("rdcycleh %0" : "=r"(cycles_hi0)); -- asm("rdcycle %0" : "=r"(cycles_lo)); -- asm("rdcycleh %0" : "=r"(cycles_hi1)); -- // This matches the PowerPC overflow detection, above -- cycles_lo &= -static_cast(cycles_hi0 == cycles_hi1); -- return (cycles_hi1 << 32) | cycles_lo; -+ uint32_t cycles_lo, cycles_hi0, cycles_hi1; -+ // This asm also includes the PowerPC overflow handling strategy, as above. -+ // Implemented in assembly because Clang insisted on branching. -+ asm volatile( -+ "rdcycleh %0\n" -+ "rdcycle %1\n" -+ "rdcycleh %2\n" -+ "sub %0, %0, %2\n" -+ "seqz %0, %0\n" -+ "sub %0, zero, %0\n" -+ "and %1, %1, %0\n" -+ : "=r"(cycles_hi0), "=r"(cycles_lo), "=r"(cycles_hi1)); -+ return (static_cast(cycles_hi1) << 32) | cycles_lo; - #else - uint64_t cycles; -- asm("rdcycle %0" : "=r"(cycles)); -+ asm volatile("rdcycle %0" : "=r"(cycles)); - return cycles; - #endif -+#elif defined(__e2k__) || defined(__elbrus__) -+ struct timeval tv; -+ gettimeofday(&tv, nullptr); -+ return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; - #else - // The soft failover to a generic implementation is automatic only for ARM. - // For other platforms the developer is expected to make an attempt to create -diff --git a/lib/benchmark/src/internal_macros.h b/lib/benchmark/src/internal_macros.h -index 6adf00d056..91f367b894 100644 ---- a/lib/benchmark/src/internal_macros.h -+++ b/lib/benchmark/src/internal_macros.h -@@ -13,7 +13,11 @@ - #endif - - #if defined(__clang__) -- #if !defined(COMPILER_CLANG) -+ #if defined(__ibmxl__) -+ #if !defined(COMPILER_IBMXL) -+ #define COMPILER_IBMXL -+ #endif -+ #elif !defined(COMPILER_CLANG) - #define COMPILER_CLANG - #endif - #elif defined(_MSC_VER) -@@ -58,6 +62,8 @@ - #define BENCHMARK_OS_NETBSD 1 - #elif defined(__OpenBSD__) - #define BENCHMARK_OS_OPENBSD 1 -+#elif defined(__DragonFly__) -+ #define BENCHMARK_OS_DRAGONFLY 1 - #elif defined(__linux__) - #define BENCHMARK_OS_LINUX 1 - #elif defined(__native_client__) -@@ -72,6 +78,8 @@ - #define BENCHMARK_OS_SOLARIS 1 - #elif defined(__QNX__) - #define BENCHMARK_OS_QNX 1 -+#elif defined(__MVS__) -+#define BENCHMARK_OS_ZOS 1 - #endif - - #if defined(__ANDROID__) && defined(__GLIBCXX__) -diff --git a/lib/benchmark/src/json_reporter.cc b/lib/benchmark/src/json_reporter.cc -index e5f3c35248..22d5ce021c 100644 ---- a/lib/benchmark/src/json_reporter.cc -+++ b/lib/benchmark/src/json_reporter.cc -@@ -12,9 +12,6 @@ - // See the License for the specific language governing permissions and - // limitations under the License. - --#include "benchmark/benchmark.h" --#include "complexity.h" -- - #include - #include - #include -@@ -25,41 +22,65 @@ - #include - #include - -+#include "benchmark/benchmark.h" -+#include "complexity.h" - #include "string_util.h" - #include "timers.h" - - namespace benchmark { -+namespace internal { -+extern std::map* global_context; -+} - - namespace { - --std::string StrEscape(const std::string & s) { -+std::string StrEscape(const std::string& s) { - std::string tmp; - tmp.reserve(s.size()); - for (char c : s) { - switch (c) { -- case '\b': tmp += "\\b"; break; -- case '\f': tmp += "\\f"; break; -- case '\n': tmp += "\\n"; break; -- case '\r': tmp += "\\r"; break; -- case '\t': tmp += "\\t"; break; -- case '\\': tmp += "\\\\"; break; -- case '"' : tmp += "\\\""; break; -- default : tmp += c; break; -+ case '\b': -+ tmp += "\\b"; -+ break; -+ case '\f': -+ tmp += "\\f"; -+ break; -+ case '\n': -+ tmp += "\\n"; -+ break; -+ case '\r': -+ tmp += "\\r"; -+ break; -+ case '\t': -+ tmp += "\\t"; -+ break; -+ case '\\': -+ tmp += "\\\\"; -+ break; -+ case '"': -+ tmp += "\\\""; -+ break; -+ default: -+ tmp += c; -+ break; - } - } - return tmp; - } - - std::string FormatKV(std::string const& key, std::string const& value) { -- return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), StrEscape(value).c_str()); -+ return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), -+ StrEscape(value).c_str()); - } - - std::string FormatKV(std::string const& key, const char* value) { -- return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), StrEscape(value).c_str()); -+ return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(), -+ StrEscape(value).c_str()); - } - - std::string FormatKV(std::string const& key, bool value) { -- return StrFormat("\"%s\": %s", StrEscape(key).c_str(), value ? "true" : "false"); -+ return StrFormat("\"%s\": %s", StrEscape(key).c_str(), -+ value ? "true" : "false"); - } - - std::string FormatKV(std::string const& key, int64_t value) { -@@ -122,8 +143,12 @@ bool JSONReporter::ReportContext(const Context& context) { - << FormatKV("mhz_per_cpu", - RoundDouble(info.cycles_per_second / 1000000.0)) - << ",\n"; -- out << indent << FormatKV("cpu_scaling_enabled", info.scaling_enabled) -- << ",\n"; -+ if (CPUInfo::Scaling::UNKNOWN != info.scaling) { -+ out << indent -+ << FormatKV("cpu_scaling_enabled", -+ info.scaling == CPUInfo::Scaling::ENABLED ? true : false) -+ << ",\n"; -+ } - - out << indent << "\"caches\": [\n"; - indent = std::string(6, ' '); -@@ -134,8 +159,8 @@ bool JSONReporter::ReportContext(const Context& context) { - out << cache_indent << FormatKV("type", CI.type) << ",\n"; - out << cache_indent << FormatKV("level", static_cast(CI.level)) - << ",\n"; -- out << cache_indent -- << FormatKV("size", static_cast(CI.size)) << ",\n"; -+ out << cache_indent << FormatKV("size", static_cast(CI.size)) -+ << ",\n"; - out << cache_indent - << FormatKV("num_sharing", static_cast(CI.num_sharing)) - << "\n"; -@@ -157,7 +182,16 @@ bool JSONReporter::ReportContext(const Context& context) { - #else - const char build_type[] = "debug"; - #endif -- out << indent << FormatKV("library_build_type", build_type) << "\n"; -+ out << indent << FormatKV("library_build_type", build_type); -+ -+ if (internal::global_context != nullptr) { -+ for (const auto& kv : *internal::global_context) { -+ out << ",\n"; -+ out << indent << FormatKV(kv.first, kv.second); -+ } -+ } -+ out << "\n"; -+ - // Close context block and open the list of benchmarks. - out << inner_indent << "},\n"; - out << inner_indent << "\"benchmarks\": [\n"; -@@ -195,6 +229,10 @@ void JSONReporter::PrintRunData(Run const& run) { - std::string indent(6, ' '); - std::ostream& out = GetOutputStream(); - out << indent << FormatKV("name", run.benchmark_name()) << ",\n"; -+ out << indent << FormatKV("family_index", run.family_index) << ",\n"; -+ out << indent -+ << FormatKV("per_family_instance_index", run.per_family_instance_index) -+ << ",\n"; - out << indent << FormatKV("run_name", run.run_name.str()) << ",\n"; - out << indent << FormatKV("run_type", [&run]() -> const char* { - switch (run.run_type) { -@@ -213,6 +251,15 @@ void JSONReporter::PrintRunData(Run const& run) { - out << indent << FormatKV("threads", run.threads) << ",\n"; - if (run.run_type == BenchmarkReporter::Run::RT_Aggregate) { - out << indent << FormatKV("aggregate_name", run.aggregate_name) << ",\n"; -+ out << indent << FormatKV("aggregate_unit", [&run]() -> const char* { -+ switch (run.aggregate_unit) { -+ case StatisticUnit::kTime: -+ return "time"; -+ case StatisticUnit::kPercentage: -+ return "percentage"; -+ } -+ BENCHMARK_UNREACHABLE(); -+ }()) << ",\n"; - } - if (run.error_occurred) { - out << indent << FormatKV("error_occurred", run.error_occurred) << ",\n"; -@@ -220,8 +267,17 @@ void JSONReporter::PrintRunData(Run const& run) { - } - if (!run.report_big_o && !run.report_rms) { - out << indent << FormatKV("iterations", run.iterations) << ",\n"; -- out << indent << FormatKV("real_time", run.GetAdjustedRealTime()) << ",\n"; -- out << indent << FormatKV("cpu_time", run.GetAdjustedCPUTime()); -+ if (run.run_type != Run::RT_Aggregate || -+ run.aggregate_unit == StatisticUnit::kTime) { -+ out << indent << FormatKV("real_time", run.GetAdjustedRealTime()) -+ << ",\n"; -+ out << indent << FormatKV("cpu_time", run.GetAdjustedCPUTime()); -+ } else { -+ assert(run.aggregate_unit == StatisticUnit::kPercentage); -+ out << indent << FormatKV("real_time", run.real_accumulated_time) -+ << ",\n"; -+ out << indent << FormatKV("cpu_time", run.cpu_accumulated_time); -+ } - out << ",\n" - << indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit)); - } else if (run.report_big_o) { -diff --git a/lib/benchmark/src/log.h b/lib/benchmark/src/log.h -index 47d0c35c01..48c071aded 100644 ---- a/lib/benchmark/src/log.h -+++ b/lib/benchmark/src/log.h -@@ -67,7 +67,7 @@ inline LogType& GetLogInstanceForLevel(int level) { - } // end namespace benchmark - - // clang-format off --#define VLOG(x) \ -+#define BM_VLOG(x) \ - (::benchmark::internal::GetLogInstanceForLevel(x) << "-- LOG(" << x << "):" \ - " ") - // clang-format on -diff --git a/lib/benchmark/src/mutex.h b/lib/benchmark/src/mutex.h -index 3fac79aea4..bec78d9e5f 100644 ---- a/lib/benchmark/src/mutex.h -+++ b/lib/benchmark/src/mutex.h -@@ -9,60 +9,60 @@ - // Enable thread safety attributes only with clang. - // The attributes can be safely erased when compiling with other compilers. - #if defined(HAVE_THREAD_SAFETY_ATTRIBUTES) --#define THREAD_ANNOTATION_ATTRIBUTE__(x) __attribute__((x)) -+#define THREAD_ANNOTATION_ATTRIBUTE_(x) __attribute__((x)) - #else --#define THREAD_ANNOTATION_ATTRIBUTE__(x) // no-op -+#define THREAD_ANNOTATION_ATTRIBUTE_(x) // no-op - #endif - --#define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(capability(x)) -+#define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(capability(x)) - --#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable) -+#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE_(scoped_lockable) - --#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x)) -+#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(guarded_by(x)) - --#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded_by(x)) -+#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(pt_guarded_by(x)) - - #define ACQUIRED_BEFORE(...) \ -- THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(__VA_ARGS__)) -+ THREAD_ANNOTATION_ATTRIBUTE_(acquired_before(__VA_ARGS__)) - - #define ACQUIRED_AFTER(...) \ -- THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(__VA_ARGS__)) -+ THREAD_ANNOTATION_ATTRIBUTE_(acquired_after(__VA_ARGS__)) - - #define REQUIRES(...) \ -- THREAD_ANNOTATION_ATTRIBUTE__(requires_capability(__VA_ARGS__)) -+ THREAD_ANNOTATION_ATTRIBUTE_(requires_capability(__VA_ARGS__)) - - #define REQUIRES_SHARED(...) \ -- THREAD_ANNOTATION_ATTRIBUTE__(requires_shared_capability(__VA_ARGS__)) -+ THREAD_ANNOTATION_ATTRIBUTE_(requires_shared_capability(__VA_ARGS__)) - - #define ACQUIRE(...) \ -- THREAD_ANNOTATION_ATTRIBUTE__(acquire_capability(__VA_ARGS__)) -+ THREAD_ANNOTATION_ATTRIBUTE_(acquire_capability(__VA_ARGS__)) - - #define ACQUIRE_SHARED(...) \ -- THREAD_ANNOTATION_ATTRIBUTE__(acquire_shared_capability(__VA_ARGS__)) -+ THREAD_ANNOTATION_ATTRIBUTE_(acquire_shared_capability(__VA_ARGS__)) - - #define RELEASE(...) \ -- THREAD_ANNOTATION_ATTRIBUTE__(release_capability(__VA_ARGS__)) -+ THREAD_ANNOTATION_ATTRIBUTE_(release_capability(__VA_ARGS__)) - - #define RELEASE_SHARED(...) \ -- THREAD_ANNOTATION_ATTRIBUTE__(release_shared_capability(__VA_ARGS__)) -+ THREAD_ANNOTATION_ATTRIBUTE_(release_shared_capability(__VA_ARGS__)) - - #define TRY_ACQUIRE(...) \ -- THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_capability(__VA_ARGS__)) -+ THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_capability(__VA_ARGS__)) - - #define TRY_ACQUIRE_SHARED(...) \ -- THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_shared_capability(__VA_ARGS__)) -+ THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_shared_capability(__VA_ARGS__)) - --#define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(__VA_ARGS__)) -+#define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE_(locks_excluded(__VA_ARGS__)) - --#define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(assert_capability(x)) -+#define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(assert_capability(x)) - - #define ASSERT_SHARED_CAPABILITY(x) \ -- THREAD_ANNOTATION_ATTRIBUTE__(assert_shared_capability(x)) -+ THREAD_ANNOTATION_ATTRIBUTE_(assert_shared_capability(x)) - --#define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x)) -+#define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(lock_returned(x)) - - #define NO_THREAD_SAFETY_ANALYSIS \ -- THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis) -+ THREAD_ANNOTATION_ATTRIBUTE_(no_thread_safety_analysis) - - namespace benchmark { - -@@ -130,7 +130,7 @@ class Barrier { - // entered the barrier. Returns iff this is the last thread to - // enter the barrier. - bool createBarrier(MutexLock& ml) REQUIRES(lock_) { -- CHECK_LT(entered_, running_threads_); -+ BM_CHECK_LT(entered_, running_threads_); - entered_++; - if (entered_ < running_threads_) { - // Wait for all threads to enter -diff --git a/lib/benchmark/src/perf_counters.cc b/lib/benchmark/src/perf_counters.cc -new file mode 100644 -index 0000000000..4ddf0de250 ---- /dev/null -+++ b/lib/benchmark/src/perf_counters.cc -@@ -0,0 +1,132 @@ -+// Copyright 2021 Google Inc. All rights reserved. -+// -+// Licensed under the Apache License, Version 2.0 (the "License"); -+// you may not use this file except in compliance with the License. -+// You may obtain a copy of the License at -+// -+// http://www.apache.org/licenses/LICENSE-2.0 -+// -+// Unless required by applicable law or agreed to in writing, software -+// distributed under the License is distributed on an "AS IS" BASIS, -+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+// See the License for the specific language governing permissions and -+// limitations under the License. -+ -+#include "perf_counters.h" -+ -+#include -+#include -+ -+#if defined HAVE_LIBPFM -+#include "perfmon/pfmlib.h" -+#include "perfmon/pfmlib_perf_event.h" -+#endif -+ -+namespace benchmark { -+namespace internal { -+ -+constexpr size_t PerfCounterValues::kMaxCounters; -+ -+#if defined HAVE_LIBPFM -+const bool PerfCounters::kSupported = true; -+ -+bool PerfCounters::Initialize() { return pfm_initialize() == PFM_SUCCESS; } -+ -+PerfCounters PerfCounters::Create( -+ const std::vector& counter_names) { -+ if (counter_names.empty()) { -+ return NoCounters(); -+ } -+ if (counter_names.size() > PerfCounterValues::kMaxCounters) { -+ GetErrorLogInstance() -+ << counter_names.size() -+ << " counters were requested. The minimum is 1, the maximum is " -+ << PerfCounterValues::kMaxCounters << "\n"; -+ return NoCounters(); -+ } -+ std::vector counter_ids(counter_names.size()); -+ -+ const int mode = PFM_PLM3; // user mode only -+ for (size_t i = 0; i < counter_names.size(); ++i) { -+ const bool is_first = i == 0; -+ struct perf_event_attr attr{}; -+ attr.size = sizeof(attr); -+ const int group_id = !is_first ? counter_ids[0] : -1; -+ const auto& name = counter_names[i]; -+ if (name.empty()) { -+ GetErrorLogInstance() << "A counter name was the empty string\n"; -+ return NoCounters(); -+ } -+ pfm_perf_encode_arg_t arg{}; -+ arg.attr = &attr; -+ -+ const int pfm_get = -+ pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT, &arg); -+ if (pfm_get != PFM_SUCCESS) { -+ GetErrorLogInstance() << "Unknown counter name: " << name << "\n"; -+ return NoCounters(); -+ } -+ attr.disabled = is_first; -+ // Note: the man page for perf_event_create suggests inerit = true and -+ // read_format = PERF_FORMAT_GROUP don't work together, but that's not the -+ // case. -+ attr.inherit = true; -+ attr.pinned = is_first; -+ attr.exclude_kernel = true; -+ attr.exclude_user = false; -+ attr.exclude_hv = true; -+ // Read all counters in one read. -+ attr.read_format = PERF_FORMAT_GROUP; -+ -+ int id = -1; -+ static constexpr size_t kNrOfSyscallRetries = 5; -+ // Retry syscall as it was interrupted often (b/64774091). -+ for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries; -+ ++num_retries) { -+ id = perf_event_open(&attr, 0, -1, group_id, 0); -+ if (id >= 0 || errno != EINTR) { -+ break; -+ } -+ } -+ if (id < 0) { -+ GetErrorLogInstance() -+ << "Failed to get a file descriptor for " << name << "\n"; -+ return NoCounters(); -+ } -+ -+ counter_ids[i] = id; -+ } -+ if (ioctl(counter_ids[0], PERF_EVENT_IOC_ENABLE) != 0) { -+ GetErrorLogInstance() << "Failed to start counters\n"; -+ return NoCounters(); -+ } -+ -+ return PerfCounters(counter_names, std::move(counter_ids)); -+} -+ -+PerfCounters::~PerfCounters() { -+ if (counter_ids_.empty()) { -+ return; -+ } -+ ioctl(counter_ids_[0], PERF_EVENT_IOC_DISABLE); -+ for (int fd : counter_ids_) { -+ close(fd); -+ } -+} -+#else // defined HAVE_LIBPFM -+const bool PerfCounters::kSupported = false; -+ -+bool PerfCounters::Initialize() { return false; } -+ -+PerfCounters PerfCounters::Create( -+ const std::vector& counter_names) { -+ if (!counter_names.empty()) { -+ GetErrorLogInstance() << "Performance counters not supported."; -+ } -+ return NoCounters(); -+} -+ -+PerfCounters::~PerfCounters() = default; -+#endif // defined HAVE_LIBPFM -+} // namespace internal -+} // namespace benchmark -diff --git a/lib/benchmark/src/perf_counters.h b/lib/benchmark/src/perf_counters.h -new file mode 100644 -index 0000000000..47ca1385e2 ---- /dev/null -+++ b/lib/benchmark/src/perf_counters.h -@@ -0,0 +1,172 @@ -+// Copyright 2021 Google Inc. All rights reserved. -+// -+// Licensed under the Apache License, Version 2.0 (the "License"); -+// you may not use this file except in compliance with the License. -+// You may obtain a copy of the License at -+// -+// http://www.apache.org/licenses/LICENSE-2.0 -+// -+// Unless required by applicable law or agreed to in writing, software -+// distributed under the License is distributed on an "AS IS" BASIS, -+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+// See the License for the specific language governing permissions and -+// limitations under the License. -+ -+#ifndef BENCHMARK_PERF_COUNTERS_H -+#define BENCHMARK_PERF_COUNTERS_H -+ -+#include -+#include -+#include -+ -+#include "benchmark/benchmark.h" -+#include "check.h" -+#include "log.h" -+ -+#ifndef BENCHMARK_OS_WINDOWS -+#include -+#endif -+ -+namespace benchmark { -+namespace internal { -+ -+// Typically, we can only read a small number of counters. There is also a -+// padding preceding counter values, when reading multiple counters with one -+// syscall (which is desirable). PerfCounterValues abstracts these details. -+// The implementation ensures the storage is inlined, and allows 0-based -+// indexing into the counter values. -+// The object is used in conjunction with a PerfCounters object, by passing it -+// to Snapshot(). The values are populated such that -+// perfCounters->names()[i]'s value is obtained at position i (as given by -+// operator[]) of this object. -+class PerfCounterValues { -+ public: -+ explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) { -+ BM_CHECK_LE(nr_counters_, kMaxCounters); -+ } -+ -+ uint64_t operator[](size_t pos) const { return values_[kPadding + pos]; } -+ -+ static constexpr size_t kMaxCounters = 3; -+ -+ private: -+ friend class PerfCounters; -+ // Get the byte buffer in which perf counters can be captured. -+ // This is used by PerfCounters::Read -+ std::pair get_data_buffer() { -+ return {reinterpret_cast(values_.data()), -+ sizeof(uint64_t) * (kPadding + nr_counters_)}; -+ } -+ -+ static constexpr size_t kPadding = 1; -+ std::array values_; -+ const size_t nr_counters_; -+}; -+ -+// Collect PMU counters. The object, once constructed, is ready to be used by -+// calling read(). PMU counter collection is enabled from the time create() is -+// called, to obtain the object, until the object's destructor is called. -+class PerfCounters final { -+ public: -+ // True iff this platform supports performance counters. -+ static const bool kSupported; -+ -+ bool IsValid() const { return is_valid_; } -+ static PerfCounters NoCounters() { return PerfCounters(); } -+ -+ ~PerfCounters(); -+ PerfCounters(PerfCounters&&) = default; -+ PerfCounters(const PerfCounters&) = delete; -+ -+ // Platform-specific implementations may choose to do some library -+ // initialization here. -+ static bool Initialize(); -+ -+ // Return a PerfCounters object ready to read the counters with the names -+ // specified. The values are user-mode only. The counter name format is -+ // implementation and OS specific. -+ // TODO: once we move to C++-17, this should be a std::optional, and then the -+ // IsValid() boolean can be dropped. -+ static PerfCounters Create(const std::vector& counter_names); -+ -+ // Take a snapshot of the current value of the counters into the provided -+ // valid PerfCounterValues storage. The values are populated such that: -+ // names()[i]'s value is (*values)[i] -+ BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const { -+#ifndef BENCHMARK_OS_WINDOWS -+ assert(values != nullptr); -+ assert(IsValid()); -+ auto buffer = values->get_data_buffer(); -+ auto read_bytes = ::read(counter_ids_[0], buffer.first, buffer.second); -+ return static_cast(read_bytes) == buffer.second; -+#else -+ (void)values; -+ return false; -+#endif -+ } -+ -+ const std::vector& names() const { return counter_names_; } -+ size_t num_counters() const { return counter_names_.size(); } -+ -+ private: -+ PerfCounters(const std::vector& counter_names, -+ std::vector&& counter_ids) -+ : counter_ids_(std::move(counter_ids)), -+ counter_names_(counter_names), -+ is_valid_(true) {} -+ PerfCounters() : is_valid_(false) {} -+ -+ std::vector counter_ids_; -+ const std::vector counter_names_; -+ const bool is_valid_; -+}; -+ -+// Typical usage of the above primitives. -+class PerfCountersMeasurement final { -+ public: -+ PerfCountersMeasurement(PerfCounters&& c) -+ : counters_(std::move(c)), -+ start_values_(counters_.IsValid() ? counters_.names().size() : 0), -+ end_values_(counters_.IsValid() ? counters_.names().size() : 0) {} -+ -+ bool IsValid() const { return counters_.IsValid(); } -+ -+ BENCHMARK_ALWAYS_INLINE void Start() { -+ assert(IsValid()); -+ // Tell the compiler to not move instructions above/below where we take -+ // the snapshot. -+ ClobberMemory(); -+ counters_.Snapshot(&start_values_); -+ ClobberMemory(); -+ } -+ -+ BENCHMARK_ALWAYS_INLINE std::vector> -+ StopAndGetMeasurements() { -+ assert(IsValid()); -+ // Tell the compiler to not move instructions above/below where we take -+ // the snapshot. -+ ClobberMemory(); -+ counters_.Snapshot(&end_values_); -+ ClobberMemory(); -+ -+ std::vector> ret; -+ for (size_t i = 0; i < counters_.names().size(); ++i) { -+ double measurement = static_cast(end_values_[i]) - -+ static_cast(start_values_[i]); -+ ret.push_back({counters_.names()[i], measurement}); -+ } -+ return ret; -+ } -+ -+ private: -+ PerfCounters counters_; -+ PerfCounterValues start_values_; -+ PerfCounterValues end_values_; -+}; -+ -+BENCHMARK_UNUSED static bool perf_init_anchor = PerfCounters::Initialize(); -+ -+} // namespace internal -+} // namespace benchmark -+ -+#endif // BENCHMARK_PERF_COUNTERS_H -diff --git a/lib/benchmark/src/re.h b/lib/benchmark/src/re.h -index fbe25037b4..630046782d 100644 ---- a/lib/benchmark/src/re.h -+++ b/lib/benchmark/src/re.h -@@ -126,7 +126,7 @@ inline bool Regex::Init(const std::string& spec, std::string* error) { - - // regerror returns the number of bytes necessary to null terminate - // the string, so we move that when assigning to error. -- CHECK_NE(needed, 0); -+ BM_CHECK_NE(needed, 0); - error->assign(errbuf, needed - 1); - - delete[] errbuf; -diff --git a/lib/benchmark/src/reporter.cc b/lib/benchmark/src/reporter.cc -index 0b54fa421a..c720a9df1d 100644 ---- a/lib/benchmark/src/reporter.cc -+++ b/lib/benchmark/src/reporter.cc -@@ -18,6 +18,8 @@ - #include - - #include -+#include -+#include - #include - #include - -@@ -25,6 +27,9 @@ - #include "string_util.h" - - namespace benchmark { -+namespace internal { -+extern std::map* global_context; -+} - - BenchmarkReporter::BenchmarkReporter() - : output_stream_(&std::cout), error_stream_(&std::cerr) {} -@@ -33,7 +38,7 @@ BenchmarkReporter::~BenchmarkReporter() {} - - void BenchmarkReporter::PrintBasicContext(std::ostream *out, - Context const &context) { -- CHECK(out) << "cannot be null"; -+ BM_CHECK(out) << "cannot be null"; - auto &Out = *out; - - Out << LocalDateTimeString() << "\n"; -@@ -64,7 +69,13 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out, - Out << "\n"; - } - -- if (info.scaling_enabled) { -+ if (internal::global_context != nullptr) { -+ for (const auto& kv: *internal::global_context) { -+ Out << kv.first << ": " << kv.second << "\n"; -+ } -+ } -+ -+ if (CPUInfo::Scaling::ENABLED == info.scaling) { - Out << "***WARNING*** CPU scaling is enabled, the benchmark " - "real time measurements may be noisy and will incur extra " - "overhead.\n"; -diff --git a/lib/benchmark/src/sleep.cc b/lib/benchmark/src/sleep.cc -index 1512ac90f7..4609d540ea 100644 ---- a/lib/benchmark/src/sleep.cc -+++ b/lib/benchmark/src/sleep.cc -@@ -24,6 +24,10 @@ - #include - #endif - -+#ifdef BENCHMARK_OS_ZOS -+#include -+#endif -+ - namespace benchmark { - #ifdef BENCHMARK_OS_WINDOWS - // Window's Sleep takes milliseconds argument. -@@ -33,11 +37,23 @@ void SleepForSeconds(double seconds) { - } - #else // BENCHMARK_OS_WINDOWS - void SleepForMicroseconds(int microseconds) { -+#ifdef BENCHMARK_OS_ZOS -+ // z/OS does not support nanosleep. Instead call sleep() and then usleep() to -+ // sleep for the remaining microseconds because usleep() will fail if its -+ // argument is greater than 1000000. -+ div_t sleepTime = div(microseconds, kNumMicrosPerSecond); -+ int seconds = sleepTime.quot; -+ while (seconds != 0) -+ seconds = sleep(seconds); -+ while (usleep(sleepTime.rem) == -1 && errno == EINTR) -+ ; -+#else - struct timespec sleep_time; - sleep_time.tv_sec = microseconds / kNumMicrosPerSecond; - sleep_time.tv_nsec = (microseconds % kNumMicrosPerSecond) * kNumNanosPerMicro; - while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) - ; // Ignore signals and wait for the full interval to elapse. -+#endif - } - - void SleepForMilliseconds(int milliseconds) { -diff --git a/lib/benchmark/src/statistics.cc b/lib/benchmark/src/statistics.cc -index bd5a3d6597..00ae97dfa8 100644 ---- a/lib/benchmark/src/statistics.cc -+++ b/lib/benchmark/src/statistics.cc -@@ -74,6 +74,15 @@ double StatisticsStdDev(const std::vector& v) { - return Sqrt(v.size() / (v.size() - 1.0) * (avg_squares - Sqr(mean))); - } - -+double StatisticsCV(const std::vector& v) { -+ if (v.size() < 2) return 0.0; -+ -+ const auto stddev = StatisticsStdDev(v); -+ const auto mean = StatisticsMean(v); -+ -+ return stddev / mean; -+} -+ - std::vector ComputeStats( - const std::vector& reports) { - typedef BenchmarkReporter::Run Run; -@@ -112,22 +121,22 @@ std::vector ComputeStats( - it = counter_stats.find(cnt.first); - it->second.s.reserve(reports.size()); - } else { -- CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags); -+ BM_CHECK_EQ(counter_stats[cnt.first].c.flags, cnt.second.flags); - } - } - } - - // Populate the accumulators. - for (Run const& run : reports) { -- CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name()); -- CHECK_EQ(run_iterations, run.iterations); -+ BM_CHECK_EQ(reports[0].benchmark_name(), run.benchmark_name()); -+ BM_CHECK_EQ(run_iterations, run.iterations); - if (run.error_occurred) continue; - real_accumulated_time_stat.emplace_back(run.real_accumulated_time); - cpu_accumulated_time_stat.emplace_back(run.cpu_accumulated_time); - // user counters - for (auto const& cnt : run.counters) { - auto it = counter_stats.find(cnt.first); -- CHECK_NE(it, counter_stats.end()); -+ BM_CHECK_NE(it, counter_stats.end()); - it->second.s.emplace_back(cnt.second); - } - } -@@ -148,11 +157,14 @@ std::vector ComputeStats( - // Get the data from the accumulator to BenchmarkReporter::Run's. - Run data; - data.run_name = reports[0].run_name; -+ data.family_index = reports[0].family_index; -+ data.per_family_instance_index = reports[0].per_family_instance_index; - data.run_type = BenchmarkReporter::Run::RT_Aggregate; - data.threads = reports[0].threads; - data.repetitions = reports[0].repetitions; - data.repetition_index = Run::no_repetition_index; - data.aggregate_name = Stat.name_; -+ data.aggregate_unit = Stat.unit_; - data.report_label = report_label; - - // It is incorrect to say that an aggregate is computed over -@@ -165,13 +177,15 @@ std::vector ComputeStats( - data.real_accumulated_time = Stat.compute_(real_accumulated_time_stat); - data.cpu_accumulated_time = Stat.compute_(cpu_accumulated_time_stat); - -- // We will divide these times by data.iterations when reporting, but the -- // data.iterations is not nessesairly the scale of these measurements, -- // because in each repetition, these timers are sum over all the iterations. -- // And if we want to say that the stats are over N repetitions and not -- // M iterations, we need to multiply these by (N/M). -- data.real_accumulated_time *= iteration_rescale_factor; -- data.cpu_accumulated_time *= iteration_rescale_factor; -+ if (data.aggregate_unit == StatisticUnit::kTime) { -+ // We will divide these times by data.iterations when reporting, but the -+ // data.iterations is not necessarily the scale of these measurements, -+ // because in each repetition, these timers are sum over all the iters. -+ // And if we want to say that the stats are over N repetitions and not -+ // M iterations, we need to multiply these by (N/M). -+ data.real_accumulated_time *= iteration_rescale_factor; -+ data.cpu_accumulated_time *= iteration_rescale_factor; -+ } - - data.time_unit = reports[0].time_unit; - -diff --git a/lib/benchmark/src/statistics.h b/lib/benchmark/src/statistics.h -index 7eccc85536..a9545a58c6 100644 ---- a/lib/benchmark/src/statistics.h -+++ b/lib/benchmark/src/statistics.h -@@ -31,6 +31,7 @@ std::vector ComputeStats( - double StatisticsMean(const std::vector& v); - double StatisticsMedian(const std::vector& v); - double StatisticsStdDev(const std::vector& v); -+double StatisticsCV(const std::vector& v); - - } // end namespace benchmark - -diff --git a/lib/benchmark/src/string_util.cc b/lib/benchmark/src/string_util.cc -index 39b01a1719..3551418174 100644 ---- a/lib/benchmark/src/string_util.cc -+++ b/lib/benchmark/src/string_util.cc -@@ -1,6 +1,9 @@ - #include "string_util.h" - - #include -+#ifdef BENCHMARK_STL_ANDROID_GNUSTL -+#include -+#endif - #include - #include - #include -@@ -160,6 +163,19 @@ std::string StrFormat(const char* format, ...) { - return tmp; - } - -+std::vector StrSplit(const std::string& str, char delim) { -+ if (str.empty()) return {}; -+ std::vector ret; -+ size_t first = 0; -+ size_t next = str.find(delim); -+ for (; next != std::string::npos; -+ first = next + 1, next = str.find(delim, first)) { -+ ret.push_back(str.substr(first, next - first)); -+ } -+ ret.push_back(str.substr(first)); -+ return ret; -+} -+ - #ifdef BENCHMARK_STL_ANDROID_GNUSTL - /* - * GNU STL in Android NDK lacks support for some C++11 functions, including -diff --git a/lib/benchmark/src/string_util.h b/lib/benchmark/src/string_util.h -index 09d7b4bd2a..6bc28b6912 100644 ---- a/lib/benchmark/src/string_util.h -+++ b/lib/benchmark/src/string_util.h -@@ -37,6 +37,8 @@ inline std::string StrCat(Args&&... args) { - return ss.str(); - } - -+std::vector StrSplit(const std::string& str, char delim); -+ - #ifdef BENCHMARK_STL_ANDROID_GNUSTL - /* - * GNU STL in Android NDK lacks support for some C++11 functions, including -diff --git a/lib/benchmark/src/sysinfo.cc b/lib/benchmark/src/sysinfo.cc -index 5b7c4af780..937604fe58 100644 ---- a/lib/benchmark/src/sysinfo.cc -+++ b/lib/benchmark/src/sysinfo.cc -@@ -29,7 +29,8 @@ - #include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD - #include - #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || \ -- defined BENCHMARK_OS_NETBSD || defined BENCHMARK_OS_OPENBSD -+ defined BENCHMARK_OS_NETBSD || defined BENCHMARK_OS_OPENBSD || \ -+ defined BENCHMARK_OS_DRAGONFLY - #define BENCHMARK_HAS_SYSCTL - #include - #endif -@@ -57,6 +58,7 @@ - #include - #include - #include -+#include - - #include "check.h" - #include "cycleclock.h" -@@ -133,7 +135,7 @@ struct ValueUnion { - template - std::array GetAsArray() { - const int ArrSize = sizeof(T) * N; -- CHECK_LE(ArrSize, Size); -+ BM_CHECK_LE(ArrSize, Size); - std::array Arr; - std::memcpy(Arr.data(), data(), ArrSize); - return Arr; -@@ -209,13 +211,12 @@ bool ReadFromFile(std::string const& fname, ArgT* arg) { - return f.good(); - } - --bool CpuScalingEnabled(int num_cpus) { -+CPUInfo::Scaling CpuScaling(int num_cpus) { - // We don't have a valid CPU count, so don't even bother. -- if (num_cpus <= 0) return false; --#ifdef BENCHMARK_OS_QNX -- return false; --#endif --#ifndef BENCHMARK_OS_WINDOWS -+ if (num_cpus <= 0) return CPUInfo::Scaling::UNKNOWN; -+#if defined(BENCHMARK_OS_QNX) -+ return CPUInfo::Scaling::UNKNOWN; -+#elif !defined(BENCHMARK_OS_WINDOWS) - // On Linux, the CPUfreq subsystem exposes CPU information as files on the - // local file system. If reading the exported files fails, then we may not be - // running on Linux, so we silently ignore all the read errors. -@@ -223,10 +224,12 @@ bool CpuScalingEnabled(int num_cpus) { - for (int cpu = 0; cpu < num_cpus; ++cpu) { - std::string governor_file = - StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor"); -- if (ReadFromFile(governor_file, &res) && res != "performance") return true; -+ if (ReadFromFile(governor_file, &res) && res != "performance") return CPUInfo::Scaling::ENABLED; - } -+ return CPUInfo::Scaling::DISABLED; -+#else -+ return CPUInfo::Scaling::UNKNOWN; - #endif -- return false; - } - - int CountSetBitsInCPUMap(std::string Val) { -@@ -382,9 +385,11 @@ std::vector GetCacheSizesQNX() { - case CACHE_FLAG_UNIFIED : - info.type = "Unified"; - info.level = 2; -+ break; - case CACHE_FLAG_SHARED : - info.type = "Shared"; - info.level = 3; -+ break; - default : - continue; - break; -@@ -439,7 +444,7 @@ std::string GetSystemName() { - #elif defined(BENCHMARK_OS_RTEMS) - #define HOST_NAME_MAX 256 - #else --#warning "HOST_NAME_MAX not defined. using 64" -+#pragma message("HOST_NAME_MAX not defined. using 64") - #define HOST_NAME_MAX 64 - #endif - #endif // def HOST_NAME_MAX -@@ -525,7 +530,11 @@ int GetNumCPUs() { - BENCHMARK_UNREACHABLE(); - } - --double GetCPUCyclesPerSecond() { -+double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) { -+ // Currently, scaling is only used on linux path here, -+ // suppress diagnostics about it being unused on other paths. -+ (void)scaling; -+ - #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN - long freq; - -@@ -536,8 +545,15 @@ double GetCPUCyclesPerSecond() { - // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as - // well. - if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq) -- // If CPU scaling is in effect, we want to use the *maximum* frequency, -- // not whatever CPU speed some random processor happens to be using now. -+ // If CPU scaling is disabled, use the *current* frequency. -+ // Note that we specifically don't want to read cpuinfo_cur_freq, -+ // because it is only readable by root. -+ || (scaling == CPUInfo::Scaling::DISABLED && -+ ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq", -+ &freq)) -+ // Otherwise, if CPU scaling may be in effect, we want to use -+ // the *maximum* frequency, not whatever CPU speed some random processor -+ // happens to be using now. - || ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", - &freq)) { - // The value is in kHz (as the file name suggests). For example, on a -@@ -603,6 +619,8 @@ double GetCPUCyclesPerSecond() { - "machdep.tsc_freq"; - #elif defined BENCHMARK_OS_OPENBSD - "hw.cpuspeed"; -+#elif defined BENCHMARK_OS_DRAGONFLY -+ "hw.tsc_frequency"; - #else - "hw.cpufrequency"; - #endif -@@ -667,9 +685,10 @@ double GetCPUCyclesPerSecond() { - } - - std::vector GetLoadAvg() { --#if (defined BENCHMARK_OS_FREEBSD || defined(BENCHMARK_OS_LINUX) || \ -- defined BENCHMARK_OS_MACOSX || defined BENCHMARK_OS_NETBSD || \ -- defined BENCHMARK_OS_OPENBSD) && !defined(__ANDROID__) -+#if (defined BENCHMARK_OS_FREEBSD || defined(BENCHMARK_OS_LINUX) || \ -+ defined BENCHMARK_OS_MACOSX || defined BENCHMARK_OS_NETBSD || \ -+ defined BENCHMARK_OS_OPENBSD || defined BENCHMARK_OS_DRAGONFLY) && \ -+ !defined(__ANDROID__) - constexpr int kMaxSamples = 3; - std::vector res(kMaxSamples, 0.0); - const int nelem = getloadavg(res.data(), kMaxSamples); -@@ -693,12 +712,11 @@ const CPUInfo& CPUInfo::Get() { - - CPUInfo::CPUInfo() - : num_cpus(GetNumCPUs()), -- cycles_per_second(GetCPUCyclesPerSecond()), -+ scaling(CpuScaling(num_cpus)), -+ cycles_per_second(GetCPUCyclesPerSecond(scaling)), - caches(GetCacheSizes()), -- scaling_enabled(CpuScalingEnabled(num_cpus)), - load_avg(GetLoadAvg()) {} - -- - const SystemInfo& SystemInfo::Get() { - static const SystemInfo* info = new SystemInfo(); - return *info; -diff --git a/lib/benchmark/src/thread_timer.h b/lib/benchmark/src/thread_timer.h -index 1703ca0d6f..eb23f59561 100644 ---- a/lib/benchmark/src/thread_timer.h -+++ b/lib/benchmark/src/thread_timer.h -@@ -28,7 +28,7 @@ class ThreadTimer { - - // Called by each thread - void StopTimer() { -- CHECK(running_); -+ BM_CHECK(running_); - running_ = false; - real_time_used_ += ChronoClockNow() - start_real_time_; - // Floating point error can result in the subtraction producing a negative -@@ -44,19 +44,19 @@ class ThreadTimer { - - // REQUIRES: timer is not running - double real_time_used() const { -- CHECK(!running_); -+ BM_CHECK(!running_); - return real_time_used_; - } - - // REQUIRES: timer is not running - double cpu_time_used() const { -- CHECK(!running_); -+ BM_CHECK(!running_); - return cpu_time_used_; - } - - // REQUIRES: timer is not running - double manual_time_used() const { -- CHECK(!running_); -+ BM_CHECK(!running_); - return manual_time_used_; - } - -diff --git a/lib/benchmark/src/timers.cc b/lib/benchmark/src/timers.cc -index 7613ff92c6..1f05574269 100644 ---- a/lib/benchmark/src/timers.cc -+++ b/lib/benchmark/src/timers.cc -@@ -28,7 +28,8 @@ - #include - #include // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD - #include --#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX -+#if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_DRAGONFLY || \ -+ defined BENCHMARK_OS_MACOSX - #include - #endif - #if defined(BENCHMARK_OS_MACOSX) -@@ -178,40 +179,75 @@ double ThreadCPUUsage() { - #endif - } - --namespace { -- --std::string DateTimeString(bool local) { -+std::string LocalDateTimeString() { -+ // Write the local time in RFC3339 format yyyy-mm-ddTHH:MM:SS+/-HH:MM. - typedef std::chrono::system_clock Clock; - std::time_t now = Clock::to_time_t(Clock::now()); -- const std::size_t kStorageSize = 128; -- char storage[kStorageSize]; -- std::size_t written; -+ const std::size_t kTzOffsetLen = 6; -+ const std::size_t kTimestampLen = 19; -+ -+ std::size_t tz_len; -+ std::size_t timestamp_len; -+ long int offset_minutes; -+ char tz_offset_sign = '+'; -+ // tz_offset is set in one of three ways: -+ // * strftime with %z - This either returns empty or the ISO 8601 time. The maximum length an -+ // ISO 8601 string can be is 7 (e.g. -03:30, plus trailing zero). -+ // * snprintf with %c%02li:%02li - The maximum length is 41 (one for %c, up to 19 for %02li, -+ // one for :, up to 19 %02li, plus trailing zero). -+ // * A fixed string of "-00:00". The maximum length is 7 (-00:00, plus trailing zero). -+ // -+ // Thus, the maximum size this needs to be is 41. -+ char tz_offset[41]; -+ // Long enough buffer to avoid format-overflow warnings -+ char storage[128]; - -- if (local) { - #if defined(BENCHMARK_OS_WINDOWS) -- written = -- std::strftime(storage, sizeof(storage), "%x %X", ::localtime(&now)); -+ std::tm *timeinfo_p = ::localtime(&now); - #else -- std::tm timeinfo; -- ::localtime_r(&now, &timeinfo); -- written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo); -+ std::tm timeinfo; -+ std::tm *timeinfo_p = &timeinfo; -+ ::localtime_r(&now, &timeinfo); - #endif -+ -+ tz_len = std::strftime(tz_offset, sizeof(tz_offset), "%z", timeinfo_p); -+ -+ if (tz_len < kTzOffsetLen && tz_len > 1) { -+ // Timezone offset was written. strftime writes offset as +HHMM or -HHMM, -+ // RFC3339 specifies an offset as +HH:MM or -HH:MM. To convert, we parse -+ // the offset as an integer, then reprint it to a string. -+ -+ offset_minutes = ::strtol(tz_offset, NULL, 10); -+ if (offset_minutes < 0) { -+ offset_minutes *= -1; -+ tz_offset_sign = '-'; -+ } -+ -+ tz_len = ::snprintf(tz_offset, sizeof(tz_offset), "%c%02li:%02li", -+ tz_offset_sign, offset_minutes / 100, offset_minutes % 100); -+ BM_CHECK(tz_len == kTzOffsetLen); -+ ((void)tz_len); // Prevent unused variable warning in optimized build. - } else { -+ // Unknown offset. RFC3339 specifies that unknown local offsets should be -+ // written as UTC time with -00:00 timezone. - #if defined(BENCHMARK_OS_WINDOWS) -- written = std::strftime(storage, sizeof(storage), "%x %X", ::gmtime(&now)); -+ // Potential race condition if another thread calls localtime or gmtime. -+ timeinfo_p = ::gmtime(&now); - #else -- std::tm timeinfo; - ::gmtime_r(&now, &timeinfo); -- written = std::strftime(storage, sizeof(storage), "%F %T", &timeinfo); - #endif -+ -+ strncpy(tz_offset, "-00:00", kTzOffsetLen + 1); - } -- CHECK(written < kStorageSize); -- ((void)written); // prevent unused variable in optimized mode. -- return std::string(storage); --} - --} // end namespace -+ timestamp_len = std::strftime(storage, sizeof(storage), "%Y-%m-%dT%H:%M:%S", -+ timeinfo_p); -+ BM_CHECK(timestamp_len == kTimestampLen); -+ // Prevent unused variable warning in optimized build. -+ ((void)kTimestampLen); - --std::string LocalDateTimeString() { return DateTimeString(true); } -+ std::strncat(storage, tz_offset, sizeof(storage) - timestamp_len - 1); -+ return std::string(storage); -+} - - } // end namespace benchmark -diff --git a/lib/benchmark/tools/compare.py b/lib/benchmark/tools/compare.py -index 539ace6fb1..01d2c89f50 100755 ---- a/lib/benchmark/tools/compare.py -+++ b/lib/benchmark/tools/compare.py -@@ -7,6 +7,7 @@ compare.py - versatile benchmark output compare tool - - import argparse - from argparse import ArgumentParser -+import json - import sys - import gbench - from gbench import util, report -@@ -48,6 +49,20 @@ def create_parser(): - "of repetitions. Do note that only the display is affected. " - "Internally, all the actual runs are still used, e.g. for U test.") - -+ parser.add_argument( -+ '--no-color', -+ dest='color', -+ default=True, -+ action="store_false", -+ help="Do not use colors in the terminal output" -+ ) -+ -+ parser.add_argument( -+ '-d', -+ '--dump_to_json', -+ dest='dump_to_json', -+ help="Additionally, dump benchmark comparison output to this file in JSON format.") -+ - utest = parser.add_argument_group() - utest.add_argument( - '--no-utest', -@@ -223,10 +238,10 @@ def main(): - options_contender = ['--benchmark_filter=%s' % filter_contender] - - # Run the benchmarks and report the results -- json1 = json1_orig = gbench.util.run_or_load_benchmark( -- test_baseline, benchmark_options + options_baseline) -- json2 = json2_orig = gbench.util.run_or_load_benchmark( -- test_contender, benchmark_options + options_contender) -+ json1 = json1_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark( -+ test_baseline, benchmark_options + options_baseline)) -+ json2 = json2_orig = gbench.util.sort_benchmark_results(gbench.util.run_or_load_benchmark( -+ test_contender, benchmark_options + options_contender)) - - # Now, filter the benchmarks so that the difference report can work - if filter_baseline and filter_contender: -@@ -236,14 +251,20 @@ def main(): - json2 = gbench.report.filter_benchmark( - json2_orig, filter_contender, replacement) - -- # Diff and output -- output_lines = gbench.report.generate_difference_report( -- json1, json2, args.display_aggregates_only, -- args.utest, args.utest_alpha) -+ diff_report = gbench.report.get_difference_report( -+ json1, json2, args.utest) -+ output_lines = gbench.report.print_difference_report( -+ diff_report, -+ args.display_aggregates_only, -+ args.utest, args.utest_alpha, args.color) - print(description) - for ln in output_lines: - print(ln) - -+ # Optionally, diff and output to JSON -+ if args.dump_to_json is not None: -+ with open(args.dump_to_json, 'w') as f_json: -+ json.dump(diff_report, f_json) - - class TestParser(unittest.TestCase): - def setUp(self): -diff --git a/lib/benchmark/tools/gbench/report.py b/lib/benchmark/tools/gbench/report.py -index 5bd3a8d85d..8203cbad02 100644 ---- a/lib/benchmark/tools/gbench/report.py -+++ b/lib/benchmark/tools/gbench/report.py -@@ -1,9 +1,11 @@ --import unittest - """report.py - Utilities for reporting statistics about benchmark results - """ -+ -+import unittest - import os - import re - import copy -+import random - - from scipy.stats import mannwhitneyu - -@@ -154,6 +156,7 @@ def extract_field(partition, field_name): - rhs = [x[field_name] for x in partition[1]] - return [lhs, rhs] - -+ - def calc_utest(timings_cpu, timings_time): - min_rep_cnt = min(len(timings_time[0]), - len(timings_time[1]), -@@ -171,46 +174,106 @@ def calc_utest(timings_cpu, timings_time): - - return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue - --def print_utest(partition, utest_alpha, first_col_width, use_color=True): -+def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True): - def get_utest_color(pval): - return BC_FAIL if pval >= utest_alpha else BC_OKGREEN - -- timings_time = extract_field(partition, 'real_time') -- timings_cpu = extract_field(partition, 'cpu_time') -- have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time) -- - # Check if we failed miserably with minimum required repetitions for utest -- if not have_optimal_repetitions and cpu_pvalue is None and time_pvalue is None: -+ if not utest['have_optimal_repetitions'] and utest['cpu_pvalue'] is None and utest['time_pvalue'] is None: - return [] - - dsc = "U Test, Repetitions: {} vs {}".format( -- len(timings_cpu[0]), len(timings_cpu[1])) -+ utest['nr_of_repetitions'], utest['nr_of_repetitions_other']) - dsc_color = BC_OKGREEN - - # We still got some results to show but issue a warning about it. -- if not have_optimal_repetitions: -+ if not utest['have_optimal_repetitions']: - dsc_color = BC_WARNING - dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format( - UTEST_OPTIMAL_REPETITIONS) - - special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{} {}" - -- last_name = partition[0][0]['name'] - return [color_format(use_color, - special_str, - BC_HEADER, -- "{}{}".format(last_name, UTEST_COL_NAME), -+ "{}{}".format(bc_name, UTEST_COL_NAME), - first_col_width, -- get_utest_color(time_pvalue), time_pvalue, -- get_utest_color(cpu_pvalue), cpu_pvalue, -+ get_utest_color( -+ utest['time_pvalue']), utest['time_pvalue'], -+ get_utest_color( -+ utest['cpu_pvalue']), utest['cpu_pvalue'], - dsc_color, dsc, - endc=BC_ENDC)] - - --def generate_difference_report( -+def get_difference_report( - json1, - json2, -- display_aggregates_only=False, -+ utest=False): -+ """ -+ Calculate and report the difference between each test of two benchmarks -+ runs specified as 'json1' and 'json2'. Output is another json containing -+ relevant details for each test run. -+ """ -+ assert utest is True or utest is False -+ -+ diff_report = [] -+ partitions = partition_benchmarks(json1, json2) -+ for partition in partitions: -+ benchmark_name = partition[0][0]['name'] -+ time_unit = partition[0][0]['time_unit'] -+ measurements = [] -+ utest_results = {} -+ # Careful, we may have different repetition count. -+ for i in range(min(len(partition[0]), len(partition[1]))): -+ bn = partition[0][i] -+ other_bench = partition[1][i] -+ measurements.append({ -+ 'real_time': bn['real_time'], -+ 'cpu_time': bn['cpu_time'], -+ 'real_time_other': other_bench['real_time'], -+ 'cpu_time_other': other_bench['cpu_time'], -+ 'time': calculate_change(bn['real_time'], other_bench['real_time']), -+ 'cpu': calculate_change(bn['cpu_time'], other_bench['cpu_time']) -+ }) -+ -+ # After processing the whole partition, if requested, do the U test. -+ if utest: -+ timings_cpu = extract_field(partition, 'cpu_time') -+ timings_time = extract_field(partition, 'real_time') -+ have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time) -+ if cpu_pvalue and time_pvalue: -+ utest_results = { -+ 'have_optimal_repetitions': have_optimal_repetitions, -+ 'cpu_pvalue': cpu_pvalue, -+ 'time_pvalue': time_pvalue, -+ 'nr_of_repetitions': len(timings_cpu[0]), -+ 'nr_of_repetitions_other': len(timings_cpu[1]) -+ } -+ -+ # Store only if we had any measurements for given benchmark. -+ # E.g. partition_benchmarks will filter out the benchmarks having -+ # time units which are not compatible with other time units in the -+ # benchmark suite. -+ if measurements: -+ run_type = partition[0][0]['run_type'] if 'run_type' in partition[0][0] else '' -+ aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else '' -+ diff_report.append({ -+ 'name': benchmark_name, -+ 'measurements': measurements, -+ 'time_unit': time_unit, -+ 'run_type': run_type, -+ 'aggregate_name': aggregate_name, -+ 'utest': utest_results -+ }) -+ -+ return diff_report -+ -+ -+def print_difference_report( -+ json_diff_report, -+ include_aggregates_only=False, - utest=False, - utest_alpha=0.05, - use_color=True): -@@ -219,14 +282,16 @@ def generate_difference_report( - runs specified as 'json1' and 'json2'. - """ - assert utest is True or utest is False -- first_col_width = find_longest_name(json1['benchmarks']) - -- def find_test(name): -- for b in json2['benchmarks']: -- if b['name'] == name: -- return b -- return None -+ def get_color(res): -+ if res > 0.05: -+ return BC_FAIL -+ elif res > -0.07: -+ return BC_WHITE -+ else: -+ return BC_CYAN - -+ first_col_width = find_longest_name(json_diff_report) - first_col_width = max( - first_col_width, - len('Benchmark')) -@@ -235,50 +300,33 @@ def generate_difference_report( - 'Benchmark', 12 + first_col_width) - output_strs = [first_line, '-' * len(first_line)] - -- partitions = partition_benchmarks(json1, json2) -- for partition in partitions: -- # Careful, we may have different repetition count. -- for i in range(min(len(partition[0]), len(partition[1]))): -- bn = partition[0][i] -- other_bench = partition[1][i] -- -- # *If* we were asked to only display aggregates, -- # and if it is non-aggregate, then skip it. -- if display_aggregates_only and 'run_type' in bn and 'run_type' in other_bench: -- assert bn['run_type'] == other_bench['run_type'] -- if bn['run_type'] != 'aggregate': -- continue -- -- fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}" -- -- def get_color(res): -- if res > 0.05: -- return BC_FAIL -- elif res > -0.07: -- return BC_WHITE -- else: -- return BC_CYAN -- -- tres = calculate_change(bn['real_time'], other_bench['real_time']) -- cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time']) -- output_strs += [color_format(use_color, -- fmt_str, -- BC_HEADER, -- bn['name'], -- first_col_width, -- get_color(tres), -- tres, -- get_color(cpures), -- cpures, -- bn['real_time'], -- other_bench['real_time'], -- bn['cpu_time'], -- other_bench['cpu_time'], -- endc=BC_ENDC)] -- -- # After processing the whole partition, if requested, do the U test. -- if utest: -- output_strs += print_utest(partition, -+ fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}" -+ for benchmark in json_diff_report: -+ # *If* we were asked to only include aggregates, -+ # and if it is non-aggregate, then don't print it. -+ if not include_aggregates_only or not 'run_type' in benchmark or benchmark['run_type'] == 'aggregate': -+ for measurement in benchmark['measurements']: -+ output_strs += [color_format(use_color, -+ fmt_str, -+ BC_HEADER, -+ benchmark['name'], -+ first_col_width, -+ get_color(measurement['time']), -+ measurement['time'], -+ get_color(measurement['cpu']), -+ measurement['cpu'], -+ measurement['real_time'], -+ measurement['real_time_other'], -+ measurement['cpu_time'], -+ measurement['cpu_time_other'], -+ endc=BC_ENDC)] -+ -+ # After processing the measurements, if requested and -+ # if applicable (e.g. u-test exists for given benchmark), -+ # print the U test. -+ if utest and benchmark['utest']: -+ output_strs += print_utest(benchmark['name'], -+ benchmark['utest'], - utest_alpha=utest_alpha, - first_col_width=first_col_width, - use_color=use_color) -@@ -319,21 +367,26 @@ class TestGetUniqueBenchmarkNames(unittest.TestCase): - - - class TestReportDifference(unittest.TestCase): -- def load_results(self): -- import json -- testInputs = os.path.join( -- os.path.dirname( -- os.path.realpath(__file__)), -- 'Inputs') -- testOutput1 = os.path.join(testInputs, 'test1_run1.json') -- testOutput2 = os.path.join(testInputs, 'test1_run2.json') -- with open(testOutput1, 'r') as f: -- json1 = json.load(f) -- with open(testOutput2, 'r') as f: -- json2 = json.load(f) -- return json1, json2 -- -- def test_basic(self): -+ @classmethod -+ def setUpClass(cls): -+ def load_results(): -+ import json -+ testInputs = os.path.join( -+ os.path.dirname( -+ os.path.realpath(__file__)), -+ 'Inputs') -+ testOutput1 = os.path.join(testInputs, 'test1_run1.json') -+ testOutput2 = os.path.join(testInputs, 'test1_run2.json') -+ with open(testOutput1, 'r') as f: -+ json1 = json.load(f) -+ with open(testOutput2, 'r') as f: -+ json2 = json.load(f) -+ return json1, json2 -+ -+ json1, json2 = load_results() -+ cls.json_diff_report = get_difference_report(json1, json2) -+ -+ def test_json_diff_report_pretty_printing(self): - expect_lines = [ - ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'], - ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'], -@@ -351,9 +404,8 @@ class TestReportDifference(unittest.TestCase): - ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'], - ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'], - ] -- json1, json2 = self.load_results() -- output_lines_with_header = generate_difference_report( -- json1, json2, use_color=False) -+ output_lines_with_header = print_difference_report( -+ self.json_diff_report, use_color=False) - output_lines = output_lines_with_header[2:] - print("\n") - print("\n".join(output_lines_with_header)) -@@ -363,31 +415,118 @@ class TestReportDifference(unittest.TestCase): - self.assertEqual(len(parts), 7) - self.assertEqual(expect_lines[i], parts) - -+ def test_json_diff_report_output(self): -+ expected_output = [ -+ { -+ 'name': 'BM_SameTimes', -+ 'measurements': [{'time': 0.0000, 'cpu': 0.0000, 'real_time': 10, 'real_time_other': 10, 'cpu_time': 10, 'cpu_time_other': 10}], -+ 'time_unit': 'ns', -+ 'utest': {} -+ }, -+ { -+ 'name': 'BM_2xFaster', -+ 'measurements': [{'time': -0.5000, 'cpu': -0.5000, 'real_time': 50, 'real_time_other': 25, 'cpu_time': 50, 'cpu_time_other': 25}], -+ 'time_unit': 'ns', -+ 'utest': {} -+ }, -+ { -+ 'name': 'BM_2xSlower', -+ 'measurements': [{'time': 1.0000, 'cpu': 1.0000, 'real_time': 50, 'real_time_other': 100, 'cpu_time': 50, 'cpu_time_other': 100}], -+ 'time_unit': 'ns', -+ 'utest': {} -+ }, -+ { -+ 'name': 'BM_1PercentFaster', -+ 'measurements': [{'time': -0.0100, 'cpu': -0.0100, 'real_time': 100, 'real_time_other': 98.9999999, 'cpu_time': 100, 'cpu_time_other': 98.9999999}], -+ 'time_unit': 'ns', -+ 'utest': {} -+ }, -+ { -+ 'name': 'BM_1PercentSlower', -+ 'measurements': [{'time': 0.0100, 'cpu': 0.0100, 'real_time': 100, 'real_time_other': 101, 'cpu_time': 100, 'cpu_time_other': 101}], -+ 'time_unit': 'ns', -+ 'utest': {} -+ }, -+ { -+ 'name': 'BM_10PercentFaster', -+ 'measurements': [{'time': -0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 90, 'cpu_time': 100, 'cpu_time_other': 90}], -+ 'time_unit': 'ns', -+ 'utest': {} -+ }, -+ { -+ 'name': 'BM_10PercentSlower', -+ 'measurements': [{'time': 0.1000, 'cpu': 0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 110}], -+ 'time_unit': 'ns', -+ 'utest': {} -+ }, -+ { -+ 'name': 'BM_100xSlower', -+ 'measurements': [{'time': 99.0000, 'cpu': 99.0000, 'real_time': 100, 'real_time_other': 10000, 'cpu_time': 100, 'cpu_time_other': 10000}], -+ 'time_unit': 'ns', -+ 'utest': {} -+ }, -+ { -+ 'name': 'BM_100xFaster', -+ 'measurements': [{'time': -0.9900, 'cpu': -0.9900, 'real_time': 10000, 'real_time_other': 100, 'cpu_time': 10000, 'cpu_time_other': 100}], -+ 'time_unit': 'ns', -+ 'utest': {} -+ }, -+ { -+ 'name': 'BM_10PercentCPUToTime', -+ 'measurements': [{'time': 0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 90}], -+ 'time_unit': 'ns', -+ 'utest': {} -+ }, -+ { -+ 'name': 'BM_ThirdFaster', -+ 'measurements': [{'time': -0.3333, 'cpu': -0.3334, 'real_time': 100, 'real_time_other': 67, 'cpu_time': 100, 'cpu_time_other': 67}], -+ 'time_unit': 'ns', -+ 'utest': {} -+ }, -+ { -+ 'name': 'BM_NotBadTimeUnit', -+ 'measurements': [{'time': -0.9000, 'cpu': 0.2000, 'real_time': 0.4, 'real_time_other': 0.04, 'cpu_time': 0.5, 'cpu_time_other': 0.6}], -+ 'time_unit': 's', -+ 'utest': {} -+ }, -+ ] -+ self.assertEqual(len(self.json_diff_report), len(expected_output)) -+ for out, expected in zip( -+ self.json_diff_report, expected_output): -+ self.assertEqual(out['name'], expected['name']) -+ self.assertEqual(out['time_unit'], expected['time_unit']) -+ assert_utest(self, out, expected) -+ assert_measurements(self, out, expected) -+ - - class TestReportDifferenceBetweenFamilies(unittest.TestCase): -- def load_result(self): -- import json -- testInputs = os.path.join( -- os.path.dirname( -- os.path.realpath(__file__)), -- 'Inputs') -- testOutput = os.path.join(testInputs, 'test2_run.json') -- with open(testOutput, 'r') as f: -- json = json.load(f) -- return json -+ @classmethod -+ def setUpClass(cls): -+ def load_result(): -+ import json -+ testInputs = os.path.join( -+ os.path.dirname( -+ os.path.realpath(__file__)), -+ 'Inputs') -+ testOutput = os.path.join(testInputs, 'test2_run.json') -+ with open(testOutput, 'r') as f: -+ json = json.load(f) -+ return json -+ -+ json = load_result() -+ json1 = filter_benchmark(json, "BM_Z.ro", ".") -+ json2 = filter_benchmark(json, "BM_O.e", ".") -+ cls.json_diff_report = get_difference_report(json1, json2) - -- def test_basic(self): -+ def test_json_diff_report_pretty_printing(self): - expect_lines = [ - ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'], - ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'], - ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'], - ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'], - ] -- json = self.load_result() -- json1 = filter_benchmark(json, "BM_Z.ro", ".") -- json2 = filter_benchmark(json, "BM_O.e", ".") -- output_lines_with_header = generate_difference_report( -- json1, json2, use_color=False) -+ output_lines_with_header = print_difference_report( -+ self.json_diff_report, use_color=False) - output_lines = output_lines_with_header[2:] - print("\n") - print("\n".join(output_lines_with_header)) -@@ -397,31 +536,71 @@ class TestReportDifferenceBetweenFamilies(unittest.TestCase): - self.assertEqual(len(parts), 7) - self.assertEqual(expect_lines[i], parts) - -+ def test_json_diff_report(self): -+ expected_output = [ -+ { -+ 'name': u'.', -+ 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 10, 'real_time_other': 5, 'cpu_time': 10, 'cpu_time_other': 5}], -+ 'time_unit': 'ns', -+ 'utest': {} -+ }, -+ { -+ 'name': u'./4', -+ 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 40, 'real_time_other': 20, 'cpu_time': 40, 'cpu_time_other': 20}], -+ 'time_unit': 'ns', -+ 'utest': {}, -+ }, -+ { -+ 'name': u'Prefix/.', -+ 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 20, 'real_time_other': 10, 'cpu_time': 20, 'cpu_time_other': 10}], -+ 'time_unit': 'ns', -+ 'utest': {} -+ }, -+ { -+ 'name': u'Prefix/./3', -+ 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}], -+ 'time_unit': 'ns', -+ 'utest': {} -+ } -+ ] -+ self.assertEqual(len(self.json_diff_report), len(expected_output)) -+ for out, expected in zip( -+ self.json_diff_report, expected_output): -+ self.assertEqual(out['name'], expected['name']) -+ self.assertEqual(out['time_unit'], expected['time_unit']) -+ assert_utest(self, out, expected) -+ assert_measurements(self, out, expected) -+ - - class TestReportDifferenceWithUTest(unittest.TestCase): -- def load_results(self): -- import json -- testInputs = os.path.join( -- os.path.dirname( -- os.path.realpath(__file__)), -- 'Inputs') -- testOutput1 = os.path.join(testInputs, 'test3_run0.json') -- testOutput2 = os.path.join(testInputs, 'test3_run1.json') -- with open(testOutput1, 'r') as f: -- json1 = json.load(f) -- with open(testOutput2, 'r') as f: -- json2 = json.load(f) -- return json1, json2 -- -- def test_utest(self): -- expect_lines = [] -+ @classmethod -+ def setUpClass(cls): -+ def load_results(): -+ import json -+ testInputs = os.path.join( -+ os.path.dirname( -+ os.path.realpath(__file__)), -+ 'Inputs') -+ testOutput1 = os.path.join(testInputs, 'test3_run0.json') -+ testOutput2 = os.path.join(testInputs, 'test3_run1.json') -+ with open(testOutput1, 'r') as f: -+ json1 = json.load(f) -+ with open(testOutput2, 'r') as f: -+ json2 = json.load(f) -+ return json1, json2 -+ -+ json1, json2 = load_results() -+ cls.json_diff_report = get_difference_report( -+ json1, json2, utest=True) -+ -+ def test_json_diff_report_pretty_printing(self): - expect_lines = [ - ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], - ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], - ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], - ['BM_Two_pvalue', -- '0.6985', -- '0.6985', -+ '1.0000', -+ '0.6667', - 'U', - 'Test,', - 'Repetitions:', -@@ -438,7 +617,7 @@ class TestReportDifferenceWithUTest(unittest.TestCase): - ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], - ['short_pvalue', - '0.7671', -- '0.1489', -+ '0.2000', - 'U', - 'Test,', - 'Repetitions:', -@@ -453,9 +632,54 @@ class TestReportDifferenceWithUTest(unittest.TestCase): - 'recommended.'], - ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'], - ] -- json1, json2 = self.load_results() -- output_lines_with_header = generate_difference_report( -- json1, json2, utest=True, utest_alpha=0.05, use_color=False) -+ output_lines_with_header = print_difference_report( -+ self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False) -+ output_lines = output_lines_with_header[2:] -+ print("\n") -+ print("\n".join(output_lines_with_header)) -+ self.assertEqual(len(output_lines), len(expect_lines)) -+ for i in range(0, len(output_lines)): -+ parts = [x for x in output_lines[i].split(' ') if x] -+ self.assertEqual(expect_lines[i], parts) -+ -+ def test_json_diff_report_pretty_printing_aggregates_only(self): -+ expect_lines = [ -+ ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], -+ ['BM_Two_pvalue', -+ '1.0000', -+ '0.6667', -+ 'U', -+ 'Test,', -+ 'Repetitions:', -+ '2', -+ 'vs', -+ '2.', -+ 'WARNING:', -+ 'Results', -+ 'unreliable!', -+ '9+', -+ 'repetitions', -+ 'recommended.'], -+ ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], -+ ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], -+ ['short_pvalue', -+ '0.7671', -+ '0.2000', -+ 'U', -+ 'Test,', -+ 'Repetitions:', -+ '2', -+ 'vs', -+ '3.', -+ 'WARNING:', -+ 'Results', -+ 'unreliable!', -+ '9+', -+ 'repetitions', -+ 'recommended.'], -+ ] -+ output_lines_with_header = print_difference_report( -+ self.json_diff_report, include_aggregates_only=True, utest=True, utest_alpha=0.05, use_color=False) - output_lines = output_lines_with_header[2:] - print("\n") - print("\n".join(output_lines_with_header)) -@@ -464,32 +688,112 @@ class TestReportDifferenceWithUTest(unittest.TestCase): - parts = [x for x in output_lines[i].split(' ') if x] - self.assertEqual(expect_lines[i], parts) - -+ def test_json_diff_report(self): -+ expected_output = [ -+ { -+ 'name': u'BM_One', -+ 'measurements': [ -+ {'time': -0.1, -+ 'cpu': 0.1, -+ 'real_time': 10, -+ 'real_time_other': 9, -+ 'cpu_time': 100, -+ 'cpu_time_other': 110} -+ ], -+ 'time_unit': 'ns', -+ 'utest': {} -+ }, -+ { -+ 'name': u'BM_Two', -+ 'measurements': [ -+ {'time': 0.1111111111111111, -+ 'cpu': -0.011111111111111112, -+ 'real_time': 9, -+ 'real_time_other': 10, -+ 'cpu_time': 90, -+ 'cpu_time_other': 89}, -+ {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8, -+ 'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72} -+ ], -+ 'time_unit': 'ns', -+ 'utest': { -+ 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0 -+ } -+ }, -+ { -+ 'name': u'short', -+ 'measurements': [ -+ {'time': -0.125, -+ 'cpu': -0.0625, -+ 'real_time': 8, -+ 'real_time_other': 7, -+ 'cpu_time': 80, -+ 'cpu_time_other': 75}, -+ {'time': -0.4325, -+ 'cpu': -0.13506493506493514, -+ 'real_time': 8, -+ 'real_time_other': 4.54, -+ 'cpu_time': 77, -+ 'cpu_time_other': 66.6} -+ ], -+ 'time_unit': 'ns', -+ 'utest': { -+ 'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772 -+ } -+ }, -+ { -+ 'name': u'medium', -+ 'measurements': [ -+ {'time': -0.375, -+ 'cpu': -0.3375, -+ 'real_time': 8, -+ 'real_time_other': 5, -+ 'cpu_time': 80, -+ 'cpu_time_other': 53} -+ ], -+ 'time_unit': 'ns', -+ 'utest': {} -+ } -+ ] -+ self.assertEqual(len(self.json_diff_report), len(expected_output)) -+ for out, expected in zip( -+ self.json_diff_report, expected_output): -+ self.assertEqual(out['name'], expected['name']) -+ self.assertEqual(out['time_unit'], expected['time_unit']) -+ assert_utest(self, out, expected) -+ assert_measurements(self, out, expected) -+ - - class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( - unittest.TestCase): -- def load_results(self): -- import json -- testInputs = os.path.join( -- os.path.dirname( -- os.path.realpath(__file__)), -- 'Inputs') -- testOutput1 = os.path.join(testInputs, 'test3_run0.json') -- testOutput2 = os.path.join(testInputs, 'test3_run1.json') -- with open(testOutput1, 'r') as f: -- json1 = json.load(f) -- with open(testOutput2, 'r') as f: -- json2 = json.load(f) -- return json1, json2 -- -- def test_utest(self): -- expect_lines = [] -+ @classmethod -+ def setUpClass(cls): -+ def load_results(): -+ import json -+ testInputs = os.path.join( -+ os.path.dirname( -+ os.path.realpath(__file__)), -+ 'Inputs') -+ testOutput1 = os.path.join(testInputs, 'test3_run0.json') -+ testOutput2 = os.path.join(testInputs, 'test3_run1.json') -+ with open(testOutput1, 'r') as f: -+ json1 = json.load(f) -+ with open(testOutput2, 'r') as f: -+ json2 = json.load(f) -+ return json1, json2 -+ -+ json1, json2 = load_results() -+ cls.json_diff_report = get_difference_report( -+ json1, json2, utest=True) -+ -+ def test_json_diff_report_pretty_printing(self): - expect_lines = [ - ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], - ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], - ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], - ['BM_Two_pvalue', -- '0.6985', -- '0.6985', -+ '1.0000', -+ '0.6667', - 'U', - 'Test,', - 'Repetitions:', -@@ -506,7 +810,7 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( - ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], - ['short_pvalue', - '0.7671', -- '0.1489', -+ '0.2000', - 'U', - 'Test,', - 'Repetitions:', -@@ -519,10 +823,126 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( - '9+', - 'repetitions', - 'recommended.'], -+ ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'] -+ ] -+ output_lines_with_header = print_difference_report( -+ self.json_diff_report, -+ utest=True, utest_alpha=0.05, use_color=False) -+ output_lines = output_lines_with_header[2:] -+ print("\n") -+ print("\n".join(output_lines_with_header)) -+ self.assertEqual(len(output_lines), len(expect_lines)) -+ for i in range(0, len(output_lines)): -+ parts = [x for x in output_lines[i].split(' ') if x] -+ self.assertEqual(expect_lines[i], parts) -+ -+ def test_json_diff_report(self): -+ expected_output = [ -+ { -+ 'name': u'BM_One', -+ 'measurements': [ -+ {'time': -0.1, -+ 'cpu': 0.1, -+ 'real_time': 10, -+ 'real_time_other': 9, -+ 'cpu_time': 100, -+ 'cpu_time_other': 110} -+ ], -+ 'time_unit': 'ns', -+ 'utest': {} -+ }, -+ { -+ 'name': u'BM_Two', -+ 'measurements': [ -+ {'time': 0.1111111111111111, -+ 'cpu': -0.011111111111111112, -+ 'real_time': 9, -+ 'real_time_other': 10, -+ 'cpu_time': 90, -+ 'cpu_time_other': 89}, -+ {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8, -+ 'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72} -+ ], -+ 'time_unit': 'ns', -+ 'utest': { -+ 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0 -+ } -+ }, -+ { -+ 'name': u'short', -+ 'measurements': [ -+ {'time': -0.125, -+ 'cpu': -0.0625, -+ 'real_time': 8, -+ 'real_time_other': 7, -+ 'cpu_time': 80, -+ 'cpu_time_other': 75}, -+ {'time': -0.4325, -+ 'cpu': -0.13506493506493514, -+ 'real_time': 8, -+ 'real_time_other': 4.54, -+ 'cpu_time': 77, -+ 'cpu_time_other': 66.6} -+ ], -+ 'time_unit': 'ns', -+ 'utest': { -+ 'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772 -+ } -+ }, -+ { -+ 'name': u'medium', -+ 'measurements': [ -+ {'real_time_other': 5, -+ 'cpu_time': 80, -+ 'time': -0.375, -+ 'real_time': 8, -+ 'cpu_time_other': 53, -+ 'cpu': -0.3375 -+ } -+ ], -+ 'utest': {}, -+ 'time_unit': u'ns', -+ 'aggregate_name': '' -+ } -+ ] -+ self.assertEqual(len(self.json_diff_report), len(expected_output)) -+ for out, expected in zip( -+ self.json_diff_report, expected_output): -+ self.assertEqual(out['name'], expected['name']) -+ self.assertEqual(out['time_unit'], expected['time_unit']) -+ assert_utest(self, out, expected) -+ assert_measurements(self, out, expected) -+ -+ -+ -+class TestReportDifferenceForPercentageAggregates( -+ unittest.TestCase): -+ @classmethod -+ def setUpClass(cls): -+ def load_results(): -+ import json -+ testInputs = os.path.join( -+ os.path.dirname( -+ os.path.realpath(__file__)), -+ 'Inputs') -+ testOutput1 = os.path.join(testInputs, 'test4_run0.json') -+ testOutput2 = os.path.join(testInputs, 'test4_run1.json') -+ with open(testOutput1, 'r') as f: -+ json1 = json.load(f) -+ with open(testOutput2, 'r') as f: -+ json2 = json.load(f) -+ return json1, json2 -+ -+ json1, json2 = load_results() -+ cls.json_diff_report = get_difference_report( -+ json1, json2, utest=True) -+ -+ def test_json_diff_report_pretty_printing(self): -+ expect_lines = [ -+ ['whocares', '-0.5000', '+0.5000', '0', '0', '0', '0'] - ] -- json1, json2 = self.load_results() -- output_lines_with_header = generate_difference_report( -- json1, json2, display_aggregates_only=True, -+ output_lines_with_header = print_difference_report( -+ self.json_diff_report, - utest=True, utest_alpha=0.05, use_color=False) - output_lines = output_lines_with_header[2:] - print("\n") -@@ -532,6 +952,99 @@ class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( - parts = [x for x in output_lines[i].split(' ') if x] - self.assertEqual(expect_lines[i], parts) - -+ def test_json_diff_report(self): -+ expected_output = [ -+ { -+ 'name': u'whocares', -+ 'measurements': [ -+ {'time': -0.5, -+ 'cpu': 0.5, -+ 'real_time': 0.01, -+ 'real_time_other': 0.005, -+ 'cpu_time': 0.10, -+ 'cpu_time_other': 0.15} -+ ], -+ 'time_unit': 'ns', -+ 'utest': {} -+ } -+ ] -+ self.assertEqual(len(self.json_diff_report), len(expected_output)) -+ for out, expected in zip( -+ self.json_diff_report, expected_output): -+ self.assertEqual(out['name'], expected['name']) -+ self.assertEqual(out['time_unit'], expected['time_unit']) -+ assert_utest(self, out, expected) -+ assert_measurements(self, out, expected) -+ -+ -+class TestReportSorting(unittest.TestCase): -+ @classmethod -+ def setUpClass(cls): -+ def load_result(): -+ import json -+ testInputs = os.path.join( -+ os.path.dirname( -+ os.path.realpath(__file__)), -+ 'Inputs') -+ testOutput = os.path.join(testInputs, 'test4_run.json') -+ with open(testOutput, 'r') as f: -+ json = json.load(f) -+ return json -+ -+ cls.json = load_result() -+ -+ def test_json_diff_report_pretty_printing(self): -+ import util -+ -+ expected_names = [ -+ "99 family 0 instance 0 repetition 0", -+ "98 family 0 instance 0 repetition 1", -+ "97 family 0 instance 0 aggregate", -+ "96 family 0 instance 1 repetition 0", -+ "95 family 0 instance 1 repetition 1", -+ "94 family 0 instance 1 aggregate", -+ "93 family 1 instance 0 repetition 0", -+ "92 family 1 instance 0 repetition 1", -+ "91 family 1 instance 0 aggregate", -+ "90 family 1 instance 1 repetition 0", -+ "89 family 1 instance 1 repetition 1", -+ "88 family 1 instance 1 aggregate" -+ ] -+ -+ for n in range(len(self.json['benchmarks']) ** 2): -+ random.shuffle(self.json['benchmarks']) -+ sorted_benchmarks = util.sort_benchmark_results(self.json)[ -+ 'benchmarks'] -+ self.assertEqual(len(expected_names), len(sorted_benchmarks)) -+ for out, expected in zip(sorted_benchmarks, expected_names): -+ self.assertEqual(out['name'], expected) -+ -+ -+def assert_utest(unittest_instance, lhs, rhs): -+ if lhs['utest']: -+ unittest_instance.assertAlmostEqual( -+ lhs['utest']['cpu_pvalue'], -+ rhs['utest']['cpu_pvalue']) -+ unittest_instance.assertAlmostEqual( -+ lhs['utest']['time_pvalue'], -+ rhs['utest']['time_pvalue']) -+ unittest_instance.assertEqual( -+ lhs['utest']['have_optimal_repetitions'], -+ rhs['utest']['have_optimal_repetitions']) -+ else: -+ # lhs is empty. assert if rhs is not. -+ unittest_instance.assertEqual(lhs['utest'], rhs['utest']) -+ -+ -+def assert_measurements(unittest_instance, lhs, rhs): -+ for m1, m2 in zip(lhs['measurements'], rhs['measurements']): -+ unittest_instance.assertEqual(m1['real_time'], m2['real_time']) -+ unittest_instance.assertEqual(m1['cpu_time'], m2['cpu_time']) -+ # m1['time'] and m1['cpu'] hold values which are being calculated, -+ # and therefore we must use almost-equal pattern. -+ unittest_instance.assertAlmostEqual(m1['time'], m2['time'], places=4) -+ unittest_instance.assertAlmostEqual(m1['cpu'], m2['cpu'], places=4) -+ - - if __name__ == '__main__': - unittest.main() -diff --git a/lib/benchmark/tools/gbench/util.py b/lib/benchmark/tools/gbench/util.py -index 1f8e8e2c47..5d0012c0cb 100644 ---- a/lib/benchmark/tools/gbench/util.py -+++ b/lib/benchmark/tools/gbench/util.py -@@ -5,6 +5,7 @@ import os - import tempfile - import subprocess - import sys -+import functools - - # Input file type enumeration - IT_Invalid = 0 -@@ -119,6 +120,23 @@ def load_benchmark_results(fname): - return json.load(f) - - -+def sort_benchmark_results(result): -+ benchmarks = result['benchmarks'] -+ -+ # From inner key to the outer key! -+ benchmarks = sorted( -+ benchmarks, key=lambda benchmark: benchmark['repetition_index'] if 'repetition_index' in benchmark else -1) -+ benchmarks = sorted( -+ benchmarks, key=lambda benchmark: 1 if 'run_type' in benchmark and benchmark['run_type'] == "aggregate" else 0) -+ benchmarks = sorted( -+ benchmarks, key=lambda benchmark: benchmark['per_family_instance_index'] if 'per_family_instance_index' in benchmark else -1) -+ benchmarks = sorted( -+ benchmarks, key=lambda benchmark: benchmark['family_index'] if 'family_index' in benchmark else -1) -+ -+ result['benchmarks'] = benchmarks -+ return result -+ -+ - def run_benchmark(exe_name, benchmark_flags): - """ - Run a benchmark specified by 'exe_name' with the specified -@@ -158,7 +176,6 @@ def run_or_load_benchmark(filename, benchmark_flags): - ftype = check_input_file(filename) - if ftype == IT_JSON: - return load_benchmark_results(filename) -- elif ftype == IT_Executable: -+ if ftype == IT_Executable: - return run_benchmark(filename, benchmark_flags) -- else: -- assert False # This branch is unreachable -+ raise ValueError('Unknown file type %s' % ftype) --- -2.31.1 -