mirror of
https://git.yoctoproject.org/poky
synced 2026-05-04 13:39:49 +02:00
bitbake: hashserv: Add Unihash Garbage Collection
Adds support for removing unused unihashes from the database. This is done using a "mark and sweep" style of garbage collection where a collection is started by marking which unihashes should be kept in the database, then performing a sweep to remove any unmarked hashes. (Bitbake rev: 433d4a075a1acfbd2a2913061739353a84bb01ed) Signed-off-by: Joshua Watt <JPEWhacker@gmail.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
committed by
Richard Purdie
parent
324c9fd666
commit
1effd1014d
@@ -15,6 +15,7 @@ UNIHASH_TABLE_DEFINITION = (
|
||||
("method", "TEXT NOT NULL", "UNIQUE"),
|
||||
("taskhash", "TEXT NOT NULL", "UNIQUE"),
|
||||
("unihash", "TEXT NOT NULL", ""),
|
||||
("gc_mark", "TEXT NOT NULL", ""),
|
||||
)
|
||||
|
||||
UNIHASH_TABLE_COLUMNS = tuple(name for name, _, _ in UNIHASH_TABLE_DEFINITION)
|
||||
@@ -44,6 +45,14 @@ USERS_TABLE_DEFINITION = (
|
||||
USERS_TABLE_COLUMNS = tuple(name for name, _, _ in USERS_TABLE_DEFINITION)
|
||||
|
||||
|
||||
CONFIG_TABLE_DEFINITION = (
|
||||
("name", "TEXT NOT NULL", "UNIQUE"),
|
||||
("value", "TEXT", ""),
|
||||
)
|
||||
|
||||
CONFIG_TABLE_COLUMNS = tuple(name for name, _, _ in CONFIG_TABLE_DEFINITION)
|
||||
|
||||
|
||||
def _make_table(cursor, name, definition):
|
||||
cursor.execute(
|
||||
"""
|
||||
@@ -71,6 +80,35 @@ def map_user(row):
|
||||
)
|
||||
|
||||
|
||||
def _make_condition_statement(columns, condition):
|
||||
where = {}
|
||||
for c in columns:
|
||||
if c in condition and condition[c] is not None:
|
||||
where[c] = condition[c]
|
||||
|
||||
return where, " AND ".join("%s=:%s" % (k, k) for k in where.keys())
|
||||
|
||||
|
||||
def _get_sqlite_version(cursor):
|
||||
cursor.execute("SELECT sqlite_version()")
|
||||
|
||||
version = []
|
||||
for v in cursor.fetchone()[0].split("."):
|
||||
try:
|
||||
version.append(int(v))
|
||||
except ValueError:
|
||||
version.append(v)
|
||||
|
||||
return tuple(version)
|
||||
|
||||
|
||||
def _schema_table_name(version):
|
||||
if version >= (3, 33):
|
||||
return "sqlite_schema"
|
||||
|
||||
return "sqlite_master"
|
||||
|
||||
|
||||
class DatabaseEngine(object):
|
||||
def __init__(self, dbname, sync):
|
||||
self.dbname = dbname
|
||||
@@ -82,9 +120,10 @@ class DatabaseEngine(object):
|
||||
db.row_factory = sqlite3.Row
|
||||
|
||||
with closing(db.cursor()) as cursor:
|
||||
_make_table(cursor, "unihashes_v2", UNIHASH_TABLE_DEFINITION)
|
||||
_make_table(cursor, "unihashes_v3", UNIHASH_TABLE_DEFINITION)
|
||||
_make_table(cursor, "outhashes_v2", OUTHASH_TABLE_DEFINITION)
|
||||
_make_table(cursor, "users", USERS_TABLE_DEFINITION)
|
||||
_make_table(cursor, "config", CONFIG_TABLE_DEFINITION)
|
||||
|
||||
cursor.execute("PRAGMA journal_mode = WAL")
|
||||
cursor.execute(
|
||||
@@ -96,17 +135,38 @@ class DatabaseEngine(object):
|
||||
cursor.execute("DROP INDEX IF EXISTS outhash_lookup")
|
||||
cursor.execute("DROP INDEX IF EXISTS taskhash_lookup_v2")
|
||||
cursor.execute("DROP INDEX IF EXISTS outhash_lookup_v2")
|
||||
cursor.execute("DROP INDEX IF EXISTS taskhash_lookup_v3")
|
||||
|
||||
# TODO: Upgrade from tasks_v2?
|
||||
cursor.execute("DROP TABLE IF EXISTS tasks_v2")
|
||||
|
||||
# Create new indexes
|
||||
cursor.execute(
|
||||
"CREATE INDEX IF NOT EXISTS taskhash_lookup_v3 ON unihashes_v2 (method, taskhash)"
|
||||
"CREATE INDEX IF NOT EXISTS taskhash_lookup_v4 ON unihashes_v3 (method, taskhash)"
|
||||
)
|
||||
cursor.execute(
|
||||
"CREATE INDEX IF NOT EXISTS outhash_lookup_v3 ON outhashes_v2 (method, outhash)"
|
||||
)
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS config_lookup ON config (name)")
|
||||
|
||||
sqlite_version = _get_sqlite_version(cursor)
|
||||
|
||||
cursor.execute(
|
||||
f"""
|
||||
SELECT name FROM {_schema_table_name(sqlite_version)} WHERE type = 'table' AND name = 'unihashes_v2'
|
||||
"""
|
||||
)
|
||||
if cursor.fetchone():
|
||||
self.logger.info("Upgrading Unihashes V2 -> V3...")
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO unihashes_v3 (id, method, unihash, taskhash, gc_mark)
|
||||
SELECT id, method, unihash, taskhash, '' FROM unihashes_v2
|
||||
"""
|
||||
)
|
||||
cursor.execute("DROP TABLE unihashes_v2")
|
||||
db.commit()
|
||||
self.logger.info("Upgrade complete")
|
||||
|
||||
def connect(self, logger):
|
||||
return Database(logger, self.dbname, self.sync)
|
||||
@@ -126,16 +186,7 @@ class Database(object):
|
||||
"PRAGMA synchronous = %s" % ("NORMAL" if sync else "OFF")
|
||||
)
|
||||
|
||||
cursor.execute("SELECT sqlite_version()")
|
||||
|
||||
version = []
|
||||
for v in cursor.fetchone()[0].split("."):
|
||||
try:
|
||||
version.append(int(v))
|
||||
except ValueError:
|
||||
version.append(v)
|
||||
|
||||
self.sqlite_version = tuple(version)
|
||||
self.sqlite_version = _get_sqlite_version(cursor)
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
@@ -143,6 +194,30 @@ class Database(object):
|
||||
async def __aexit__(self, exc_type, exc_value, traceback):
|
||||
await self.close()
|
||||
|
||||
async def _set_config(self, cursor, name, value):
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT OR REPLACE INTO config (id, name, value) VALUES
|
||||
((SELECT id FROM config WHERE name=:name), :name, :value)
|
||||
""",
|
||||
{
|
||||
"name": name,
|
||||
"value": value,
|
||||
},
|
||||
)
|
||||
|
||||
async def _get_config(self, cursor, name):
|
||||
cursor.execute(
|
||||
"SELECT value FROM config WHERE name=:name",
|
||||
{
|
||||
"name": name,
|
||||
},
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
if row is None:
|
||||
return None
|
||||
return row["value"]
|
||||
|
||||
async def close(self):
|
||||
self.db.close()
|
||||
|
||||
@@ -150,8 +225,8 @@ class Database(object):
|
||||
with closing(self.db.cursor()) as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT *, unihashes_v2.unihash AS unihash FROM outhashes_v2
|
||||
INNER JOIN unihashes_v2 ON unihashes_v2.method=outhashes_v2.method AND unihashes_v2.taskhash=outhashes_v2.taskhash
|
||||
SELECT *, unihashes_v3.unihash AS unihash FROM outhashes_v2
|
||||
INNER JOIN unihashes_v3 ON unihashes_v3.method=outhashes_v2.method AND unihashes_v3.taskhash=outhashes_v2.taskhash
|
||||
WHERE outhashes_v2.method=:method AND outhashes_v2.taskhash=:taskhash
|
||||
ORDER BY outhashes_v2.created ASC
|
||||
LIMIT 1
|
||||
@@ -167,8 +242,8 @@ class Database(object):
|
||||
with closing(self.db.cursor()) as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT *, unihashes_v2.unihash AS unihash FROM outhashes_v2
|
||||
INNER JOIN unihashes_v2 ON unihashes_v2.method=outhashes_v2.method AND unihashes_v2.taskhash=outhashes_v2.taskhash
|
||||
SELECT *, unihashes_v3.unihash AS unihash FROM outhashes_v2
|
||||
INNER JOIN unihashes_v3 ON unihashes_v3.method=outhashes_v2.method AND unihashes_v3.taskhash=outhashes_v2.taskhash
|
||||
WHERE outhashes_v2.method=:method AND outhashes_v2.outhash=:outhash
|
||||
ORDER BY outhashes_v2.created ASC
|
||||
LIMIT 1
|
||||
@@ -200,8 +275,8 @@ class Database(object):
|
||||
with closing(self.db.cursor()) as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT outhashes_v2.taskhash AS taskhash, unihashes_v2.unihash AS unihash FROM outhashes_v2
|
||||
INNER JOIN unihashes_v2 ON unihashes_v2.method=outhashes_v2.method AND unihashes_v2.taskhash=outhashes_v2.taskhash
|
||||
SELECT outhashes_v2.taskhash AS taskhash, unihashes_v3.unihash AS unihash FROM outhashes_v2
|
||||
INNER JOIN unihashes_v3 ON unihashes_v3.method=outhashes_v2.method AND unihashes_v3.taskhash=outhashes_v2.taskhash
|
||||
-- Select any matching output hash except the one we just inserted
|
||||
WHERE outhashes_v2.method=:method AND outhashes_v2.outhash=:outhash AND outhashes_v2.taskhash!=:taskhash
|
||||
-- Pick the oldest hash
|
||||
@@ -219,7 +294,7 @@ class Database(object):
|
||||
async def get_equivalent(self, method, taskhash):
|
||||
with closing(self.db.cursor()) as cursor:
|
||||
cursor.execute(
|
||||
"SELECT taskhash, method, unihash FROM unihashes_v2 WHERE method=:method AND taskhash=:taskhash",
|
||||
"SELECT taskhash, method, unihash FROM unihashes_v3 WHERE method=:method AND taskhash=:taskhash",
|
||||
{
|
||||
"method": method,
|
||||
"taskhash": taskhash,
|
||||
@@ -229,15 +304,9 @@ class Database(object):
|
||||
|
||||
async def remove(self, condition):
|
||||
def do_remove(columns, table_name, cursor):
|
||||
where = {}
|
||||
for c in columns:
|
||||
if c in condition and condition[c] is not None:
|
||||
where[c] = condition[c]
|
||||
|
||||
where, clause = _make_condition_statement(columns, condition)
|
||||
if where:
|
||||
query = ("DELETE FROM %s WHERE " % table_name) + " AND ".join(
|
||||
"%s=:%s" % (k, k) for k in where.keys()
|
||||
)
|
||||
query = f"DELETE FROM {table_name} WHERE {clause}"
|
||||
cursor.execute(query, where)
|
||||
return cursor.rowcount
|
||||
|
||||
@@ -246,17 +315,80 @@ class Database(object):
|
||||
count = 0
|
||||
with closing(self.db.cursor()) as cursor:
|
||||
count += do_remove(OUTHASH_TABLE_COLUMNS, "outhashes_v2", cursor)
|
||||
count += do_remove(UNIHASH_TABLE_COLUMNS, "unihashes_v2", cursor)
|
||||
count += do_remove(UNIHASH_TABLE_COLUMNS, "unihashes_v3", cursor)
|
||||
self.db.commit()
|
||||
|
||||
return count
|
||||
|
||||
async def get_current_gc_mark(self):
|
||||
with closing(self.db.cursor()) as cursor:
|
||||
return await self._get_config(cursor, "gc-mark")
|
||||
|
||||
async def gc_status(self):
|
||||
with closing(self.db.cursor()) as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT COUNT() FROM unihashes_v3 WHERE
|
||||
gc_mark=COALESCE((SELECT value FROM config WHERE name='gc-mark'), '')
|
||||
"""
|
||||
)
|
||||
keep_rows = cursor.fetchone()[0]
|
||||
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT COUNT() FROM unihashes_v3 WHERE
|
||||
gc_mark!=COALESCE((SELECT value FROM config WHERE name='gc-mark'), '')
|
||||
"""
|
||||
)
|
||||
remove_rows = cursor.fetchone()[0]
|
||||
|
||||
current_mark = await self._get_config(cursor, "gc-mark")
|
||||
|
||||
return (keep_rows, remove_rows, current_mark)
|
||||
|
||||
async def gc_mark(self, mark, condition):
|
||||
with closing(self.db.cursor()) as cursor:
|
||||
await self._set_config(cursor, "gc-mark", mark)
|
||||
|
||||
where, clause = _make_condition_statement(UNIHASH_TABLE_COLUMNS, condition)
|
||||
|
||||
new_rows = 0
|
||||
if where:
|
||||
cursor.execute(
|
||||
f"""
|
||||
UPDATE unihashes_v3 SET
|
||||
gc_mark=COALESCE((SELECT value FROM config WHERE name='gc-mark'), '')
|
||||
WHERE {clause}
|
||||
""",
|
||||
where,
|
||||
)
|
||||
new_rows = cursor.rowcount
|
||||
|
||||
self.db.commit()
|
||||
return new_rows
|
||||
|
||||
async def gc_sweep(self):
|
||||
with closing(self.db.cursor()) as cursor:
|
||||
# NOTE: COALESCE is not used in this query so that if the current
|
||||
# mark is NULL, nothing will happen
|
||||
cursor.execute(
|
||||
"""
|
||||
DELETE FROM unihashes_v3 WHERE
|
||||
gc_mark!=(SELECT value FROM config WHERE name='gc-mark')
|
||||
"""
|
||||
)
|
||||
count = cursor.rowcount
|
||||
await self._set_config(cursor, "gc-mark", None)
|
||||
|
||||
self.db.commit()
|
||||
return count
|
||||
|
||||
async def clean_unused(self, oldest):
|
||||
with closing(self.db.cursor()) as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
DELETE FROM outhashes_v2 WHERE created<:oldest AND NOT EXISTS (
|
||||
SELECT unihashes_v2.id FROM unihashes_v2 WHERE unihashes_v2.method=outhashes_v2.method AND unihashes_v2.taskhash=outhashes_v2.taskhash LIMIT 1
|
||||
SELECT unihashes_v3.id FROM unihashes_v3 WHERE unihashes_v3.method=outhashes_v2.method AND unihashes_v3.taskhash=outhashes_v2.taskhash LIMIT 1
|
||||
)
|
||||
""",
|
||||
{
|
||||
@@ -271,7 +403,13 @@ class Database(object):
|
||||
prevrowid = cursor.lastrowid
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT OR IGNORE INTO unihashes_v2 (method, taskhash, unihash) VALUES(:method, :taskhash, :unihash)
|
||||
INSERT OR IGNORE INTO unihashes_v3 (method, taskhash, unihash, gc_mark) VALUES
|
||||
(
|
||||
:method,
|
||||
:taskhash,
|
||||
:unihash,
|
||||
COALESCE((SELECT value FROM config WHERE name='gc-mark'), '')
|
||||
)
|
||||
""",
|
||||
{
|
||||
"method": method,
|
||||
@@ -383,14 +521,9 @@ class Database(object):
|
||||
async def get_usage(self):
|
||||
usage = {}
|
||||
with closing(self.db.cursor()) as cursor:
|
||||
if self.sqlite_version >= (3, 33):
|
||||
table_name = "sqlite_schema"
|
||||
else:
|
||||
table_name = "sqlite_master"
|
||||
|
||||
cursor.execute(
|
||||
f"""
|
||||
SELECT name FROM {table_name} WHERE type = 'table' AND name NOT LIKE 'sqlite_%'
|
||||
SELECT name FROM {_schema_table_name(self.sqlite_version)} WHERE type = 'table' AND name NOT LIKE 'sqlite_%'
|
||||
"""
|
||||
)
|
||||
for row in cursor.fetchall():
|
||||
|
||||
Reference in New Issue
Block a user