mirror of
https://github.com/bitcoin/bitcoin.git
synced 2026-02-08 18:49:28 +08:00
Merge bitcoin/bitcoin#32621: contrib: utxo_to_sqlite.py: add option to store txid/spk as BLOBs
7378f27b4ftest: run utxo-to-sqlite script test with spk/txid format option combinations (Sebastian Falbesoner)b30fca7498contrib: utxo_to_sqlite.py: add options to store txid/spk as BLOBs (Sebastian Falbesoner) Pull request description: This PR is a late follow-up to https://github.com/bitcoin/bitcoin/pull/27432, introducing an option for the utxo-to-sqlite script to store the txid/scriptPubKey columns as bytes (= `BLOB` storage class in sqlite, see e.g. https://www.sqlite.org/datatype3.html in sqlite) rather than hex strings. This was proposed in earlier reviews (https://github.com/bitcoin/bitcoin/pull/27432#issuecomment-1516857024, https://github.com/bitcoin/bitcoin/pull/27432#issuecomment-1653739351) and has the obvious advantage of a significantly smaller size of the resulting database (and with that, faster conversion) and the avoidance of hex-to-bytes conversion for further processing of the data [1]. The rationale on why hex strings were chosen back then (and still stays the default, if only for compatibility reasons) is laid out in https://github.com/bitcoin/bitcoin/pull/27432#issuecomment-1516922824 [2]. The approach taken is introducing new parameters `--spk` and `--txid` which can either have the values "hex", "raw" (for scriptpubkey) and "hex", "raw", "rawle" (for txid). Thanks to ajtowns for providing this suggestion. Happy to take further inputs on naming and thoughts on future extensibility etc. [1] For a concrete example, I found that having these columns as bytes would be nice while working on a SwiftSync hints generator tool (https://github.com/theStack/swiftsync-hints-gen), which takes the result of the utxo-to-sqlite tool as input. [2] note that in contrast what I wrote back then, I think there is no ambiguity on byte-string-serialization of txids; they are ultimately just hash results and hence, they should be stored as such, and adding a big/little endian knob wouldn't make much sense. The drawback of not being able to immediately show txid-strings (as one would need to do the bytes-reversal step first, which is not possible in sqlite, see e.g. https://github.com/bitcoin/bitcoin/pull/24952#issuecomment-1165499803) still remains though. ACKs for top commit: ajtowns: ACK7378f27b4fw0xlt: reACK7378f27b4fsedited: ACK7378f27b4fTree-SHA512: 265991a1f00e3d69e06dd9adc34684720affd416042789db2d76226e4b31cf20adc433a74d14140f17739707dee57e6703f72c20bd0f8dd08b6d383d3f28b450
This commit is contained in:
@@ -9,6 +9,9 @@ $ bitcoin-cli dumptxoutset ~/utxos.dat latest
|
||||
|
||||
The created database contains a table `utxos` with the following schema:
|
||||
(txid TEXT, vout INT, value INT, coinbase INT, height INT, scriptpubkey TEXT)
|
||||
|
||||
If --txid=raw or --txid=rawle is specified, txid will be BLOB instead;
|
||||
if --spk=raw, then scriptpubkey will be BLOB instead.
|
||||
"""
|
||||
import argparse
|
||||
import os
|
||||
@@ -111,7 +114,9 @@ def main():
|
||||
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
parser.add_argument('infile', help='filename of compact-serialized UTXO set (input)')
|
||||
parser.add_argument('outfile', help='filename of created SQLite3 database (output)')
|
||||
parser.add_argument('-v', '--verbose', action='store_true', help='show details about each UTXO')
|
||||
parser.add_argument('--verbose', action='store_true', help='show details about each UTXO')
|
||||
parser.add_argument('--spk', choices=['hex', 'raw'], default='hex', help='encode scriptPubKey as hex or raw bytes')
|
||||
parser.add_argument('--txid', choices=['hex', 'raw', 'rawle'], default='hex', help='encode txid as hex, raw bytes (sha256 byteorder), or reversed raw bytes (little endian)')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.infile):
|
||||
@@ -122,9 +127,15 @@ def main():
|
||||
print(f"Error: provided output file '{args.outfile}' already exists.")
|
||||
sys.exit(1)
|
||||
|
||||
spk_hex = (args.spk == 'hex')
|
||||
txid_hex = (args.txid == 'hex')
|
||||
txid_reverse = (args.txid != 'raw')
|
||||
|
||||
# create database table
|
||||
txid_fmt = "TEXT" if txid_hex else "BLOB"
|
||||
spk_fmt = "TEXT" if spk_hex else "BLOB"
|
||||
con = sqlite3.connect(args.outfile)
|
||||
con.execute("CREATE TABLE utxos(txid TEXT, vout INT, value INT, coinbase INT, height INT, scriptpubkey TEXT)")
|
||||
con.execute(f"CREATE TABLE utxos(txid {txid_fmt}, vout INT, value INT, coinbase INT, height INT, scriptpubkey {spk_fmt})")
|
||||
|
||||
# read metadata (magic bytes, version, network magic, block hash, UTXO count)
|
||||
f = open(args.infile, 'rb')
|
||||
@@ -153,7 +164,7 @@ def main():
|
||||
for coin_idx in range(1, num_utxos+1):
|
||||
# read key (COutPoint)
|
||||
if coins_per_hash_left == 0: # read next prevout hash
|
||||
prevout_hash = f.read(32)[::-1].hex()
|
||||
prevout_hash = f.read(32)
|
||||
coins_per_hash_left = read_compactsize(f)
|
||||
prevout_index = read_compactsize(f)
|
||||
# read value (Coin)
|
||||
@@ -161,17 +172,21 @@ def main():
|
||||
height = code >> 1
|
||||
is_coinbase = code & 1
|
||||
amount = decompress_amount(read_varint(f))
|
||||
scriptpubkey = decompress_script(f).hex()
|
||||
write_batch.append((prevout_hash, prevout_index, amount, is_coinbase, height, scriptpubkey))
|
||||
scriptpubkey = decompress_script(f)
|
||||
|
||||
scriptpubkey_write = scriptpubkey.hex() if spk_hex else scriptpubkey
|
||||
txid_write = prevout_hash[::-1] if txid_reverse else prevout_hash
|
||||
txid_write = txid_write.hex() if txid_hex else txid_write
|
||||
write_batch.append((txid_write, prevout_index, amount, is_coinbase, height, scriptpubkey_write))
|
||||
if height > max_height:
|
||||
max_height = height
|
||||
coins_per_hash_left -= 1
|
||||
|
||||
if args.verbose:
|
||||
print(f"Coin {coin_idx}/{num_utxos}:")
|
||||
print(f" prevout = {prevout_hash}:{prevout_index}")
|
||||
print(f" prevout = {prevout_hash[::-1].hex()}:{prevout_index}")
|
||||
print(f" amount = {amount}, height = {height}, coinbase = {is_coinbase}")
|
||||
print(f" scriptPubKey = {scriptpubkey}\n")
|
||||
print(f" scriptPubKey = {scriptpubkey.hex()}\n")
|
||||
|
||||
if coin_idx % (16*1024) == 0 or coin_idx == num_utxos:
|
||||
# write utxo batch to database
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
# Distributed under the MIT software license, see the accompanying
|
||||
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
"""Test utxo-to-sqlite conversion tool"""
|
||||
from itertools import product
|
||||
import os.path
|
||||
try:
|
||||
import sqlite3
|
||||
@@ -15,6 +16,7 @@ from test_framework.key import ECKey
|
||||
from test_framework.messages import (
|
||||
COutPoint,
|
||||
CTxOut,
|
||||
uint256_from_str,
|
||||
)
|
||||
from test_framework.crypto.muhash import MuHash3072
|
||||
from test_framework.script import (
|
||||
@@ -38,15 +40,33 @@ from test_framework.util import (
|
||||
from test_framework.wallet import MiniWallet
|
||||
|
||||
|
||||
def calculate_muhash_from_sqlite_utxos(filename):
|
||||
def calculate_muhash_from_sqlite_utxos(filename, txid_format, spk_format):
|
||||
muhash = MuHash3072()
|
||||
con = sqlite3.connect(filename)
|
||||
cur = con.cursor()
|
||||
for (txid_hex, vout, value, coinbase, height, spk_hex) in cur.execute("SELECT * FROM utxos"):
|
||||
for (txid, vout, value, coinbase, height, spk) in cur.execute("SELECT * FROM utxos"):
|
||||
match txid_format:
|
||||
case "hex":
|
||||
assert type(txid) is str
|
||||
txid_bytes = bytes.fromhex(txid)[::-1]
|
||||
case "raw":
|
||||
assert type(txid) is bytes
|
||||
txid_bytes = txid
|
||||
case "rawle":
|
||||
assert type(txid) is bytes
|
||||
txid_bytes = txid[::-1]
|
||||
match spk_format:
|
||||
case "hex":
|
||||
assert type(spk) is str
|
||||
spk_bytes = bytes.fromhex(spk)
|
||||
case "raw":
|
||||
assert type(spk) is bytes
|
||||
spk_bytes = spk
|
||||
|
||||
# serialize UTXO for MuHash (see function `TxOutSer` in the coinstats module)
|
||||
utxo_ser = COutPoint(int(txid_hex, 16), vout).serialize()
|
||||
utxo_ser = COutPoint(uint256_from_str(txid_bytes), vout).serialize()
|
||||
utxo_ser += (height * 2 + coinbase).to_bytes(4, 'little')
|
||||
utxo_ser += CTxOut(value, bytes.fromhex(spk_hex)).serialize()
|
||||
utxo_ser += CTxOut(value, spk_bytes).serialize()
|
||||
muhash.insert(utxo_ser)
|
||||
con.close()
|
||||
return muhash.digest()[::-1].hex()
|
||||
@@ -100,17 +120,20 @@ class UtxoToSqliteTest(BitcoinTestFramework):
|
||||
input_filename = os.path.join(self.options.tmpdir, "utxos.dat")
|
||||
node.dumptxoutset(input_filename, "latest")
|
||||
|
||||
self.log.info('Convert UTXO set from compact-serialized format to sqlite format')
|
||||
output_filename = os.path.join(self.options.tmpdir, "utxos.sqlite")
|
||||
base_dir = self.config["environment"]["SRCDIR"]
|
||||
utxo_to_sqlite_path = os.path.join(base_dir, "contrib", "utxo-tools", "utxo_to_sqlite.py")
|
||||
subprocess.run([sys.executable, utxo_to_sqlite_path, input_filename, output_filename],
|
||||
check=True, stderr=subprocess.STDOUT)
|
||||
for i, (txid_format, spk_format) in enumerate(product(["hex", "raw", "rawle"], ["hex", "raw"])):
|
||||
self.log.info(f'Test utxo-to-sqlite script using txid format "{txid_format}" and spk format "{spk_format}" ({i+1})')
|
||||
self.log.info('-> Convert UTXO set from compact-serialized format to sqlite format')
|
||||
output_filename = os.path.join(self.options.tmpdir, f"utxos_{i+1}.sqlite")
|
||||
base_dir = self.config["environment"]["SRCDIR"]
|
||||
utxo_to_sqlite_path = os.path.join(base_dir, "contrib", "utxo-tools", "utxo_to_sqlite.py")
|
||||
arguments = [input_filename, output_filename, f'--txid={txid_format}', f'--spk={spk_format}']
|
||||
subprocess.run([sys.executable, utxo_to_sqlite_path] + arguments, check=True, stderr=subprocess.STDOUT)
|
||||
|
||||
self.log.info('Verify that both UTXO sets match by comparing their MuHash')
|
||||
muhash_sqlite = calculate_muhash_from_sqlite_utxos(output_filename)
|
||||
muhash_compact_serialized = node.gettxoutsetinfo('muhash')['muhash']
|
||||
assert_equal(muhash_sqlite, muhash_compact_serialized)
|
||||
self.log.info('-> Verify that both UTXO sets match by comparing their MuHash')
|
||||
muhash_sqlite = calculate_muhash_from_sqlite_utxos(output_filename, txid_format, spk_format)
|
||||
muhash_compact_serialized = node.gettxoutsetinfo('muhash')['muhash']
|
||||
assert_equal(muhash_sqlite, muhash_compact_serialized)
|
||||
self.log.info('')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user