mame: add further micro optimizations

Signed-off-by: Andreas Müller <schnitzeltony@gmail.com>
This commit is contained in:
Andreas Müller
2019-03-03 17:46:50 +01:00
parent 9efa55d805
commit 279d2bfd32
3 changed files with 129 additions and 2 deletions

View File

@@ -0,0 +1,78 @@
From f2e16fa98a94ed51d04540e7b53ffdf0bc7b5b57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20M=C3=BCller?= <schnitzeltony@gmail.com>
Date: Sat, 2 Mar 2019 23:13:27 +0100
Subject: [PATCH] OSD/OpenGl: Improve performance by moving calculations out of
loop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This one was triggered by missile.
Test case:
mame64 -nothrottle missile
Before: Average speed: 312.49% (58 seconds)
After: Average speed: 319.34% (58 seconds)
I am aware that missile tests copyline_palette16() only.
Signed-off-by: Andreas Müller <schnitzeltony@gmail.com>
---
src/osd/modules/render/drawogl.cpp | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/src/osd/modules/render/drawogl.cpp b/src/osd/modules/render/drawogl.cpp
index 98db62fdf1..045dc16954 100644
--- a/src/osd/modules/render/drawogl.cpp
+++ b/src/osd/modules/render/drawogl.cpp
@@ -2055,9 +2055,9 @@ static inline void copyline_palette16(uint32_t *dst, const uint16_t *src, int wi
*dst++ = 0xff000000 | palette[*src];
for (x = 0; x < width; x++)
{
- int srcpix = *src++;
+ uint32_t palval = palette[*src++];
for (int x2 = 0; x2 < xprescale; x2++)
- *dst++ = 0xff000000 | palette[srcpix];
+ *dst++ = 0xff000000 | palval;
}
if (xborderpix)
*dst++ = 0xff000000 | palette[*--src];
@@ -2078,9 +2078,9 @@ static inline void copyline_palettea16(uint32_t *dst, const uint16_t *src, int w
*dst++ = palette[*src];
for (x = 0; x < width; x++)
{
- int srcpix = *src++;
+ uint32_t palval = palette[*src++];
for (int x2 = 0; x2 < xprescale; x2++)
- *dst++ = palette[srcpix];
+ *dst++ = palval;
}
if (xborderpix)
*dst++ = palette[*--src];
@@ -2109,9 +2109,10 @@ static inline void copyline_rgb32(uint32_t *dst, const uint32_t *src, int width,
for (x = 0; x < width; x++)
{
rgb_t srcpix = *src++;
+ uint32_t palval = palette[0x200 + srcpix.r()] | palette[0x100 + srcpix.g()] | palette[srcpix.b()];
for (int x2 = 0; x2 < xprescale; x2++)
{
- *dst++ = 0xff000000 | palette[0x200 + srcpix.r()] | palette[0x100 + srcpix.g()] | palette[srcpix.b()];
+ *dst++ = 0xff000000 | palval;
}
}
if (xborderpix)
@@ -2161,8 +2162,9 @@ static inline void copyline_argb32(uint32_t *dst, const uint32_t *src, int width
for (x = 0; x < width; x++)
{
rgb_t srcpix = *src++;
+ uint32_t palval = palette[0x200 + srcpix.r()] | palette[0x100 + srcpix.g()] | palette[srcpix.b()];
for (int x2 = 0; x2 < xprescale; x2++)
- *dst++ = (srcpix & 0xff000000) | palette[0x200 + srcpix.r()] | palette[0x100 + srcpix.g()] | palette[srcpix.b()];
+ *dst++ = (srcpix & 0xff000000) | palval;
}
if (xborderpix)
{
--
2.20.1

View File

@@ -0,0 +1,48 @@
From 0ba9c56e7a27bd926270678e7add074fb00204f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20M=C3=BCller?= <schnitzeltony@gmail.com>
Date: Sun, 3 Mar 2019 15:00:41 +0100
Subject: [PATCH] pokey: Make step_one_clock inline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This gives another small performance win on machines using multiple pokeys. Since
it ruins our valgind output and upstream will not be keen on:
Upstream-Status: Pending
Signed-off-by: Andreas Müller <schnitzeltony@gmail.com>
---
src/devices/sound/pokey.cpp | 2 +-
src/devices/sound/pokey.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/devices/sound/pokey.cpp b/src/devices/sound/pokey.cpp
index 03ea12c7b5..80b9f64d6a 100644
--- a/src/devices/sound/pokey.cpp
+++ b/src/devices/sound/pokey.cpp
@@ -564,7 +564,7 @@ void pokey_device::step_pot()
*
*/
-void pokey_device::step_one_clock(void)
+inline void pokey_device::step_one_clock(void)
{
int const base_clock = (m_AUDCTL & CLK_15KHZ) ? CLK_114 : CLK_28;
diff --git a/src/devices/sound/pokey.h b/src/devices/sound/pokey.h
index 7e63e89e54..48858bc743 100644
--- a/src/devices/sound/pokey.h
+++ b/src/devices/sound/pokey.h
@@ -264,7 +264,7 @@ private:
static constexpr int POKEY_CHANNELS = 4;
- void step_one_clock();
+ inline void step_one_clock();
void step_keyboard();
void step_pot();
--
2.20.1

View File

@@ -9,6 +9,8 @@ SRC_URI = " \
file://0001-pokey-performance-optimization-by-not-using-modulus.patch \
file://0002-pokey-rename-pokey_device-m_output-pokey_device-m_ou.patch \
file://0003-pokey-rework-for-performance-enhancements.patch \
file://0004-OSD-OpenGl-Improve-performance-by-moving-calculation.patch \
file://no-upstream/0001-pokey-Make-step_one_clock-inline.patch \
file://mame.desktop \
"
SRC_URI[md5sum] = "7a368efb80c228258d1928ed74bbc7a4"
@@ -61,6 +63,7 @@ EXTRA_OEMAKE = " \
CROSS_BUILD=1 \
OVERRIDE_CC='${CC} ${MAME_PTR64}' \
OVERRIDE_CXX='${CXX} ${MAME_PTR64}' \
OPTIMIZE=3 \
TOOLS=1 \
USE_QTDEBUG=0 \
LTO=0 \
@@ -79,8 +82,6 @@ EXTRA_OEMAKE = " \
SDL_INI_PATH=${sysconfdir}/${BPN} \
"
CFLAGS += "-O3"
do_compile_prepend() {
# seems there is some race. Build complains
# | Assembler messages: