diff --git a/contrib/utxo-tools/utxo_to_sqlite.py b/contrib/utxo-tools/utxo_to_sqlite.py index 4758fe39aaa..56de73698c2 100755 --- a/contrib/utxo-tools/utxo_to_sqlite.py +++ b/contrib/utxo-tools/utxo_to_sqlite.py @@ -9,6 +9,9 @@ $ bitcoin-cli dumptxoutset ~/utxos.dat latest The created database contains a table `utxos` with the following schema: (txid TEXT, vout INT, value INT, coinbase INT, height INT, scriptpubkey TEXT) + +If --txid=raw or --txid=rawle is specified, txid will be BLOB instead; +if --spk=raw, then scriptpubkey will be BLOB instead. """ import argparse import os @@ -111,7 +114,9 @@ def main(): parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('infile', help='filename of compact-serialized UTXO set (input)') parser.add_argument('outfile', help='filename of created SQLite3 database (output)') - parser.add_argument('-v', '--verbose', action='store_true', help='show details about each UTXO') + parser.add_argument('--verbose', action='store_true', help='show details about each UTXO') + parser.add_argument('--spk', choices=['hex', 'raw'], default='hex', help='encode scriptPubKey as hex or raw bytes') + parser.add_argument('--txid', choices=['hex', 'raw', 'rawle'], default='hex', help='encode txid as hex, raw bytes (sha256 byteorder), or reversed raw bytes (little endian)') args = parser.parse_args() if not os.path.exists(args.infile): @@ -122,9 +127,15 @@ def main(): print(f"Error: provided output file '{args.outfile}' already exists.") sys.exit(1) + spk_hex = (args.spk == 'hex') + txid_hex = (args.txid == 'hex') + txid_reverse = (args.txid != 'raw') + # create database table + txid_fmt = "TEXT" if txid_hex else "BLOB" + spk_fmt = "TEXT" if spk_hex else "BLOB" con = sqlite3.connect(args.outfile) - con.execute("CREATE TABLE utxos(txid TEXT, vout INT, value INT, coinbase INT, height INT, scriptpubkey TEXT)") + con.execute(f"CREATE TABLE utxos(txid {txid_fmt}, vout INT, value INT, coinbase INT, height INT, scriptpubkey {spk_fmt})") # read metadata (magic bytes, version, network magic, block hash, UTXO count) f = open(args.infile, 'rb') @@ -153,7 +164,7 @@ def main(): for coin_idx in range(1, num_utxos+1): # read key (COutPoint) if coins_per_hash_left == 0: # read next prevout hash - prevout_hash = f.read(32)[::-1].hex() + prevout_hash = f.read(32) coins_per_hash_left = read_compactsize(f) prevout_index = read_compactsize(f) # read value (Coin) @@ -161,17 +172,21 @@ def main(): height = code >> 1 is_coinbase = code & 1 amount = decompress_amount(read_varint(f)) - scriptpubkey = decompress_script(f).hex() - write_batch.append((prevout_hash, prevout_index, amount, is_coinbase, height, scriptpubkey)) + scriptpubkey = decompress_script(f) + + scriptpubkey_write = scriptpubkey.hex() if spk_hex else scriptpubkey + txid_write = prevout_hash[::-1] if txid_reverse else prevout_hash + txid_write = txid_write.hex() if txid_hex else txid_write + write_batch.append((txid_write, prevout_index, amount, is_coinbase, height, scriptpubkey_write)) if height > max_height: max_height = height coins_per_hash_left -= 1 if args.verbose: print(f"Coin {coin_idx}/{num_utxos}:") - print(f" prevout = {prevout_hash}:{prevout_index}") + print(f" prevout = {prevout_hash[::-1].hex()}:{prevout_index}") print(f" amount = {amount}, height = {height}, coinbase = {is_coinbase}") - print(f" scriptPubKey = {scriptpubkey}\n") + print(f" scriptPubKey = {scriptpubkey.hex()}\n") if coin_idx % (16*1024) == 0 or coin_idx == num_utxos: # write utxo batch to database diff --git a/test/functional/tool_utxo_to_sqlite.py b/test/functional/tool_utxo_to_sqlite.py index 2da7c42a86b..d1f3e7e1934 100755 --- a/test/functional/tool_utxo_to_sqlite.py +++ b/test/functional/tool_utxo_to_sqlite.py @@ -3,6 +3,7 @@ # Distributed under the MIT software license, see the accompanying # file COPYING or http://www.opensource.org/licenses/mit-license.php. """Test utxo-to-sqlite conversion tool""" +from itertools import product import os.path try: import sqlite3 @@ -15,6 +16,7 @@ from test_framework.key import ECKey from test_framework.messages import ( COutPoint, CTxOut, + uint256_from_str, ) from test_framework.crypto.muhash import MuHash3072 from test_framework.script import ( @@ -38,15 +40,33 @@ from test_framework.util import ( from test_framework.wallet import MiniWallet -def calculate_muhash_from_sqlite_utxos(filename): +def calculate_muhash_from_sqlite_utxos(filename, txid_format, spk_format): muhash = MuHash3072() con = sqlite3.connect(filename) cur = con.cursor() - for (txid_hex, vout, value, coinbase, height, spk_hex) in cur.execute("SELECT * FROM utxos"): + for (txid, vout, value, coinbase, height, spk) in cur.execute("SELECT * FROM utxos"): + match txid_format: + case "hex": + assert type(txid) is str + txid_bytes = bytes.fromhex(txid)[::-1] + case "raw": + assert type(txid) is bytes + txid_bytes = txid + case "rawle": + assert type(txid) is bytes + txid_bytes = txid[::-1] + match spk_format: + case "hex": + assert type(spk) is str + spk_bytes = bytes.fromhex(spk) + case "raw": + assert type(spk) is bytes + spk_bytes = spk + # serialize UTXO for MuHash (see function `TxOutSer` in the coinstats module) - utxo_ser = COutPoint(int(txid_hex, 16), vout).serialize() + utxo_ser = COutPoint(uint256_from_str(txid_bytes), vout).serialize() utxo_ser += (height * 2 + coinbase).to_bytes(4, 'little') - utxo_ser += CTxOut(value, bytes.fromhex(spk_hex)).serialize() + utxo_ser += CTxOut(value, spk_bytes).serialize() muhash.insert(utxo_ser) con.close() return muhash.digest()[::-1].hex() @@ -100,17 +120,20 @@ class UtxoToSqliteTest(BitcoinTestFramework): input_filename = os.path.join(self.options.tmpdir, "utxos.dat") node.dumptxoutset(input_filename, "latest") - self.log.info('Convert UTXO set from compact-serialized format to sqlite format') - output_filename = os.path.join(self.options.tmpdir, "utxos.sqlite") - base_dir = self.config["environment"]["SRCDIR"] - utxo_to_sqlite_path = os.path.join(base_dir, "contrib", "utxo-tools", "utxo_to_sqlite.py") - subprocess.run([sys.executable, utxo_to_sqlite_path, input_filename, output_filename], - check=True, stderr=subprocess.STDOUT) + for i, (txid_format, spk_format) in enumerate(product(["hex", "raw", "rawle"], ["hex", "raw"])): + self.log.info(f'Test utxo-to-sqlite script using txid format "{txid_format}" and spk format "{spk_format}" ({i+1})') + self.log.info('-> Convert UTXO set from compact-serialized format to sqlite format') + output_filename = os.path.join(self.options.tmpdir, f"utxos_{i+1}.sqlite") + base_dir = self.config["environment"]["SRCDIR"] + utxo_to_sqlite_path = os.path.join(base_dir, "contrib", "utxo-tools", "utxo_to_sqlite.py") + arguments = [input_filename, output_filename, f'--txid={txid_format}', f'--spk={spk_format}'] + subprocess.run([sys.executable, utxo_to_sqlite_path] + arguments, check=True, stderr=subprocess.STDOUT) - self.log.info('Verify that both UTXO sets match by comparing their MuHash') - muhash_sqlite = calculate_muhash_from_sqlite_utxos(output_filename) - muhash_compact_serialized = node.gettxoutsetinfo('muhash')['muhash'] - assert_equal(muhash_sqlite, muhash_compact_serialized) + self.log.info('-> Verify that both UTXO sets match by comparing their MuHash') + muhash_sqlite = calculate_muhash_from_sqlite_utxos(output_filename, txid_format, spk_format) + muhash_compact_serialized = node.gettxoutsetinfo('muhash')['muhash'] + assert_equal(muhash_sqlite, muhash_compact_serialized) + self.log.info('') if __name__ == "__main__":