87 lines
2.3 KiB
Python
87 lines
2.3 KiB
Python
import argparse
|
|
import json
|
|
import multiprocessing
|
|
import pathlib
|
|
import queue
|
|
import time
|
|
|
|
|
|
def existing_file(path: str) -> pathlib.Path:
|
|
file_path = pathlib.Path(path)
|
|
if not file_path.exists():
|
|
raise argparse.ArgumentTypeError(f'{path!r} does not exist.')
|
|
if not file_path.is_file():
|
|
raise argparse.ArgumentTypeError(f'{path!r} is not a file.')
|
|
return file_path
|
|
|
|
|
|
def get_cursor(args):
|
|
if args.pg_connection:
|
|
import psycopg2
|
|
conn = psycopg2.connect(args.pg_connection)
|
|
cursor = conn.cursor(name='blocks')
|
|
cursor.itersize = args.chunk_size
|
|
|
|
else:
|
|
import sqlite3
|
|
conn = sqlite3.connect(args.sqlite_file)
|
|
cursor = conn.cursor()
|
|
|
|
return cursor
|
|
|
|
|
|
def create_filter(stairsplus_dump: pathlib.Path):
|
|
print('creating filter from dump...')
|
|
start = time.time()
|
|
f = {}
|
|
with stairsplus_dump.open() as fh:
|
|
data = json.load(fh)
|
|
|
|
for shaped_node in data['shaped_nodes'].keys():
|
|
f[shaped_node.encode()] = shaped_node.encode()
|
|
|
|
for alias, shaped_node in data['aliases'].items():
|
|
f[alias.encode()] = shaped_node.encode()
|
|
print(f'created in {time.time() - start}')
|
|
|
|
return f
|
|
|
|
|
|
def count_blocks(args):
|
|
cursor = get_cursor(args)
|
|
|
|
# just shy of 12 minutes for postgres w/ a 150GiB map dump, an opteron 6376, and 4 encrypted raid6 5400 RPM disks
|
|
print('counting mapblocks - this can take a while...')
|
|
start = time.time()
|
|
cursor.execute('SELECT COUNT(data) FROM blocks')
|
|
num_blocks = cursor.fetchone()[0]
|
|
elapsed = time.time() - start
|
|
print(f'num_blocks: {num_blocks} (fetched in {elapsed}s)')
|
|
return num_blocks, elapsed
|
|
|
|
|
|
def create_whitelist(filter_, all_nodes):
|
|
print('creating whitelist')
|
|
return set(
|
|
shaped_node for shaped_node in map(filter_.get, all_nodes) if shaped_node
|
|
)
|
|
|
|
|
|
def write_whitelist(args, whitelist):
|
|
if args.output:
|
|
output = args.output
|
|
else:
|
|
output = args.stairsplus_dump.parent / 'stairsplus.whitelist'
|
|
|
|
with output.open('wb') as fh:
|
|
print(f'writing whitelist to {output!r}')
|
|
fh.write(b'\n'.join(sorted(whitelist)))
|
|
|
|
|
|
def get_all_nodes(results: multiprocessing.Queue):
|
|
all_nodes = set()
|
|
try:
|
|
while True:
|
|
all_nodes.update(results.get(False))
|
|
except queue.Empty:
|
|
return all_nodes
|