mame: add further micro optimizations
Signed-off-by: Andreas Müller <schnitzeltony@gmail.com>
This commit is contained in:
@@ -0,0 +1,78 @@
|
||||
From f2e16fa98a94ed51d04540e7b53ffdf0bc7b5b57 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Andreas=20M=C3=BCller?= <schnitzeltony@gmail.com>
|
||||
Date: Sat, 2 Mar 2019 23:13:27 +0100
|
||||
Subject: [PATCH] OSD/OpenGl: Improve performance by moving calculations out of
|
||||
loop
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
This one was triggered by missile.
|
||||
|
||||
Test case:
|
||||
mame64 -nothrottle missile
|
||||
|
||||
Before: Average speed: 312.49% (58 seconds)
|
||||
After: Average speed: 319.34% (58 seconds)
|
||||
|
||||
I am aware that missile tests copyline_palette16() only.
|
||||
|
||||
Signed-off-by: Andreas Müller <schnitzeltony@gmail.com>
|
||||
---
|
||||
src/osd/modules/render/drawogl.cpp | 14 ++++++++------
|
||||
1 file changed, 8 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/src/osd/modules/render/drawogl.cpp b/src/osd/modules/render/drawogl.cpp
|
||||
index 98db62fdf1..045dc16954 100644
|
||||
--- a/src/osd/modules/render/drawogl.cpp
|
||||
+++ b/src/osd/modules/render/drawogl.cpp
|
||||
@@ -2055,9 +2055,9 @@ static inline void copyline_palette16(uint32_t *dst, const uint16_t *src, int wi
|
||||
*dst++ = 0xff000000 | palette[*src];
|
||||
for (x = 0; x < width; x++)
|
||||
{
|
||||
- int srcpix = *src++;
|
||||
+ uint32_t palval = palette[*src++];
|
||||
for (int x2 = 0; x2 < xprescale; x2++)
|
||||
- *dst++ = 0xff000000 | palette[srcpix];
|
||||
+ *dst++ = 0xff000000 | palval;
|
||||
}
|
||||
if (xborderpix)
|
||||
*dst++ = 0xff000000 | palette[*--src];
|
||||
@@ -2078,9 +2078,9 @@ static inline void copyline_palettea16(uint32_t *dst, const uint16_t *src, int w
|
||||
*dst++ = palette[*src];
|
||||
for (x = 0; x < width; x++)
|
||||
{
|
||||
- int srcpix = *src++;
|
||||
+ uint32_t palval = palette[*src++];
|
||||
for (int x2 = 0; x2 < xprescale; x2++)
|
||||
- *dst++ = palette[srcpix];
|
||||
+ *dst++ = palval;
|
||||
}
|
||||
if (xborderpix)
|
||||
*dst++ = palette[*--src];
|
||||
@@ -2109,9 +2109,10 @@ static inline void copyline_rgb32(uint32_t *dst, const uint32_t *src, int width,
|
||||
for (x = 0; x < width; x++)
|
||||
{
|
||||
rgb_t srcpix = *src++;
|
||||
+ uint32_t palval = palette[0x200 + srcpix.r()] | palette[0x100 + srcpix.g()] | palette[srcpix.b()];
|
||||
for (int x2 = 0; x2 < xprescale; x2++)
|
||||
{
|
||||
- *dst++ = 0xff000000 | palette[0x200 + srcpix.r()] | palette[0x100 + srcpix.g()] | palette[srcpix.b()];
|
||||
+ *dst++ = 0xff000000 | palval;
|
||||
}
|
||||
}
|
||||
if (xborderpix)
|
||||
@@ -2161,8 +2162,9 @@ static inline void copyline_argb32(uint32_t *dst, const uint32_t *src, int width
|
||||
for (x = 0; x < width; x++)
|
||||
{
|
||||
rgb_t srcpix = *src++;
|
||||
+ uint32_t palval = palette[0x200 + srcpix.r()] | palette[0x100 + srcpix.g()] | palette[srcpix.b()];
|
||||
for (int x2 = 0; x2 < xprescale; x2++)
|
||||
- *dst++ = (srcpix & 0xff000000) | palette[0x200 + srcpix.r()] | palette[0x100 + srcpix.g()] | palette[srcpix.b()];
|
||||
+ *dst++ = (srcpix & 0xff000000) | palval;
|
||||
}
|
||||
if (xborderpix)
|
||||
{
|
||||
--
|
||||
2.20.1
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
From 0ba9c56e7a27bd926270678e7add074fb00204f8 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Andreas=20M=C3=BCller?= <schnitzeltony@gmail.com>
|
||||
Date: Sun, 3 Mar 2019 15:00:41 +0100
|
||||
Subject: [PATCH] pokey: Make step_one_clock inline
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
This gives another small performance win on machines using multiple pokeys. Since
|
||||
it ruins our valgind output and upstream will not be keen on:
|
||||
|
||||
Upstream-Status: Pending
|
||||
|
||||
Signed-off-by: Andreas Müller <schnitzeltony@gmail.com>
|
||||
---
|
||||
src/devices/sound/pokey.cpp | 2 +-
|
||||
src/devices/sound/pokey.h | 2 +-
|
||||
2 files changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/devices/sound/pokey.cpp b/src/devices/sound/pokey.cpp
|
||||
index 03ea12c7b5..80b9f64d6a 100644
|
||||
--- a/src/devices/sound/pokey.cpp
|
||||
+++ b/src/devices/sound/pokey.cpp
|
||||
@@ -564,7 +564,7 @@ void pokey_device::step_pot()
|
||||
*
|
||||
*/
|
||||
|
||||
-void pokey_device::step_one_clock(void)
|
||||
+inline void pokey_device::step_one_clock(void)
|
||||
{
|
||||
int const base_clock = (m_AUDCTL & CLK_15KHZ) ? CLK_114 : CLK_28;
|
||||
|
||||
diff --git a/src/devices/sound/pokey.h b/src/devices/sound/pokey.h
|
||||
index 7e63e89e54..48858bc743 100644
|
||||
--- a/src/devices/sound/pokey.h
|
||||
+++ b/src/devices/sound/pokey.h
|
||||
@@ -264,7 +264,7 @@ private:
|
||||
|
||||
static constexpr int POKEY_CHANNELS = 4;
|
||||
|
||||
- void step_one_clock();
|
||||
+ inline void step_one_clock();
|
||||
void step_keyboard();
|
||||
void step_pot();
|
||||
|
||||
--
|
||||
2.20.1
|
||||
|
||||
@@ -9,6 +9,8 @@ SRC_URI = " \
|
||||
file://0001-pokey-performance-optimization-by-not-using-modulus.patch \
|
||||
file://0002-pokey-rename-pokey_device-m_output-pokey_device-m_ou.patch \
|
||||
file://0003-pokey-rework-for-performance-enhancements.patch \
|
||||
file://0004-OSD-OpenGl-Improve-performance-by-moving-calculation.patch \
|
||||
file://no-upstream/0001-pokey-Make-step_one_clock-inline.patch \
|
||||
file://mame.desktop \
|
||||
"
|
||||
SRC_URI[md5sum] = "7a368efb80c228258d1928ed74bbc7a4"
|
||||
@@ -61,6 +63,7 @@ EXTRA_OEMAKE = " \
|
||||
CROSS_BUILD=1 \
|
||||
OVERRIDE_CC='${CC} ${MAME_PTR64}' \
|
||||
OVERRIDE_CXX='${CXX} ${MAME_PTR64}' \
|
||||
OPTIMIZE=3 \
|
||||
TOOLS=1 \
|
||||
USE_QTDEBUG=0 \
|
||||
LTO=0 \
|
||||
@@ -79,8 +82,6 @@ EXTRA_OEMAKE = " \
|
||||
SDL_INI_PATH=${sysconfdir}/${BPN} \
|
||||
"
|
||||
|
||||
CFLAGS += "-O3"
|
||||
|
||||
do_compile_prepend() {
|
||||
# seems there is some race. Build complains
|
||||
# | Assembler messages:
|
||||
|
||||
Reference in New Issue
Block a user