Dash Core  0.12.2.1
P2P Digital Currency
linearize-data.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 #
3 # linearize-data.py: Construct a linear, no-fork version of the chain.
4 #
5 # Copyright (c) 2013-2014 The Bitcoin Core developers
6 # Distributed under the MIT software license, see the accompanying
7 # file COPYING or http://www.opensource.org/licenses/mit-license.php.
8 #
9 
10 from __future__ import print_function, division
11 import json
12 import struct
13 import re
14 import os
15 import os.path
16 import base64
17 import httplib
18 import sys
19 import hashlib
20 import dash_hash
21 import datetime
22 import time
23 from collections import namedtuple
24 
25 settings = {}
26 
27 def uint32(x):
28  return x & 0xffffffffL
29 
30 def bytereverse(x):
31  return uint32(( ((x) << 24) | (((x) << 8) & 0x00ff0000) |
32  (((x) >> 8) & 0x0000ff00) | ((x) >> 24) ))
33 
34 def bufreverse(in_buf):
35  out_words = []
36  for i in range(0, len(in_buf), 4):
37  word = struct.unpack('@I', in_buf[i:i+4])[0]
38  out_words.append(struct.pack('@I', bytereverse(word)))
39  return ''.join(out_words)
40 
41 def wordreverse(in_buf):
42  out_words = []
43  for i in range(0, len(in_buf), 4):
44  out_words.append(in_buf[i:i+4])
45  out_words.reverse()
46  return ''.join(out_words)
47 
48 def calc_hdr_hash(blk_hdr):
49  #hash1 = hashlib.sha256()
50  #hash1.update(blk_hdr)
51  #hash1_o = hash1.digest()
52 
53  #hash2 = hashlib.sha256()
54  #hash2.update(hash1_o)
55  #hash2_o = hash2.digest()
56 
57  #return hash2_o
58  pow_hash = dash_hash.getPoWHash(blk_hdr)
59  return pow_hash
60 
61 def calc_hash_str(blk_hdr):
62  hash = calc_hdr_hash(blk_hdr)
63  hash = bufreverse(hash)
64  hash = wordreverse(hash)
65  hash_str = hash.encode('hex')
66  return hash_str
67 
68 def get_blk_dt(blk_hdr):
69  members = struct.unpack("<I", blk_hdr[68:68+4])
70  nTime = members[0]
71  dt = datetime.datetime.fromtimestamp(nTime)
72  dt_ym = datetime.datetime(dt.year, dt.month, 1)
73  return (dt_ym, nTime)
74 
75 def get_block_hashes(settings):
76  blkindex = []
77  f = open(settings['hashlist'], "r")
78  for line in f:
79  line = line.rstrip()
80  blkindex.append(line)
81 
82  print("Read " + str(len(blkindex)) + " hashes")
83 
84  return blkindex
85 
86 def mkblockmap(blkindex):
87  blkmap = {}
88  for height,hash in enumerate(blkindex):
89  blkmap[hash] = height
90  return blkmap
91 
92 # Block header and extent on disk
93 BlockExtent = namedtuple('BlockExtent', ['fn', 'offset', 'inhdr', 'blkhdr', 'size'])
94 
96  def __init__(self, settings, blkindex, blkmap):
97  self.settings = settings
98  self.blkindex = blkindex
99  self.blkmap = blkmap
100 
101  self.inFn = 0
102  self.inF = None
103  self.outFn = 0
104  self.outsz = 0
105  self.outF = None
106  self.outFname = None
107  self.blkCountIn = 0
108  self.blkCountOut = 0
109 
110  self.lastDate = datetime.datetime(2000, 1, 1)
111  self.highTS = 1408893517 - 315360000
112  self.timestampSplit = False
113  self.fileOutput = True
114  self.setFileTime = False
115  self.maxOutSz = settings['max_out_sz']
116  if 'output' in settings:
117  self.fileOutput = False
118  if settings['file_timestamp'] != 0:
119  self.setFileTime = True
120  if settings['split_timestamp'] != 0:
121  self.timestampSplit = True
122  # Extents and cache for out-of-order blocks
123  self.blockExtents = {}
124  self.outOfOrderData = {}
125  self.outOfOrderSize = 0 # running total size for items in outOfOrderData
126 
127  def writeBlock(self, inhdr, blk_hdr, rawblock):
128  blockSizeOnDisk = len(inhdr) + len(blk_hdr) + len(rawblock)
129  if not self.fileOutput and ((self.outsz + blockSizeOnDisk) > self.maxOutSz):
130  self.outF.close()
131  if self.setFileTime:
132  os.utime(outFname, (int(time.time()), highTS))
133  self.outF = None
134  self.outFname = None
135  self.outFn = self.outFn + 1
136  self.outsz = 0
137 
138  (blkDate, blkTS) = get_blk_dt(blk_hdr)
139  if self.timestampSplit and (blkDate > self.lastDate):
140  print("New month " + blkDate.strftime("%Y-%m") + " @ " + hash_str)
141  lastDate = blkDate
142  if outF:
143  outF.close()
144  if setFileTime:
145  os.utime(outFname, (int(time.time()), highTS))
146  self.outF = None
147  self.outFname = None
148  self.outFn = self.outFn + 1
149  self.outsz = 0
150 
151  if not self.outF:
152  if self.fileOutput:
153  outFname = self.settings['output_file']
154  else:
155  outFname = os.path.join(self.settings['output'], "blk%05d.dat" % self.outFn)
156  print("Output file " + outFname)
157  self.outF = open(outFname, "wb")
158 
159  self.outF.write(inhdr)
160  self.outF.write(blk_hdr)
161  self.outF.write(rawblock)
162  self.outsz = self.outsz + len(inhdr) + len(blk_hdr) + len(rawblock)
163 
164  self.blkCountOut = self.blkCountOut + 1
165  if blkTS > self.highTS:
166  self.highTS = blkTS
167 
168  if (self.blkCountOut % 1000) == 0:
169  print('%i blocks scanned, %i blocks written (of %i, %.1f%% complete)' %
170  (self.blkCountIn, self.blkCountOut, len(self.blkindex), 100.0 * self.blkCountOut / len(self.blkindex)))
171 
172  def inFileName(self, fn):
173  return os.path.join(self.settings['input'], "blk%05d.dat" % fn)
174 
175  def fetchBlock(self, extent):
176  '''Fetch block contents from disk given extents'''
177  with open(self.inFileName(extent.fn), "rb") as f:
178  f.seek(extent.offset)
179  return f.read(extent.size)
180 
181  def copyOneBlock(self):
182  '''Find the next block to be written in the input, and copy it to the output.'''
183  extent = self.blockExtents.pop(self.blkCountOut)
184  if self.blkCountOut in self.outOfOrderData:
185  # If the data is cached, use it from memory and remove from the cache
186  rawblock = self.outOfOrderData.pop(self.blkCountOut)
187  self.outOfOrderSize -= len(rawblock)
188  else: # Otherwise look up data on disk
189  rawblock = self.fetchBlock(extent)
190 
191  self.writeBlock(extent.inhdr, extent.blkhdr, rawblock)
192 
193  def run(self):
194  while self.blkCountOut < len(self.blkindex):
195  if not self.inF:
196  fname = self.inFileName(self.inFn)
197  print("Input file " + fname)
198  try:
199  self.inF = open(fname, "rb")
200  except IOError:
201  print("Premature end of block data")
202  return
203 
204  inhdr = self.inF.read(8)
205  if (not inhdr or (inhdr[0] == "\0")):
206  self.inF.close()
207  self.inF = None
208  self.inFn = self.inFn + 1
209  continue
210 
211  inMagic = inhdr[:4]
212  if (inMagic != self.settings['netmagic']):
213  print("Invalid magic: " + inMagic.encode('hex'))
214  return
215  inLenLE = inhdr[4:]
216  su = struct.unpack("<I", inLenLE)
217  inLen = su[0] - 80 # length without header
218  blk_hdr = self.inF.read(80)
219  inExtent = BlockExtent(self.inFn, self.inF.tell(), inhdr, blk_hdr, inLen)
220 
221  hash_str = calc_hash_str(blk_hdr)
222  if not hash_str in blkmap:
223  print("Skipping unknown block " + hash_str)
224  self.inF.seek(inLen, os.SEEK_CUR)
225  continue
226 
227  blkHeight = self.blkmap[hash_str]
228  self.blkCountIn += 1
229 
230  if self.blkCountOut == blkHeight:
231  # If in-order block, just copy
232  rawblock = self.inF.read(inLen)
233  self.writeBlock(inhdr, blk_hdr, rawblock)
234 
235  # See if we can catch up to prior out-of-order blocks
236  while self.blkCountOut in self.blockExtents:
237  self.copyOneBlock()
238 
239  else: # If out-of-order, skip over block data for now
240  self.blockExtents[blkHeight] = inExtent
241  if self.outOfOrderSize < self.settings['out_of_order_cache_sz']:
242  # If there is space in the cache, read the data
243  # Reading the data in file sequence instead of seeking and fetching it later is preferred,
244  # but we don't want to fill up memory
245  self.outOfOrderData[blkHeight] = self.inF.read(inLen)
246  self.outOfOrderSize += inLen
247  else: # If no space in cache, seek forward
248  self.inF.seek(inLen, os.SEEK_CUR)
249 
250  print("Done (%i blocks written)" % (self.blkCountOut))
251 
252 if __name__ == '__main__':
253  if len(sys.argv) != 2:
254  print("Usage: linearize-data.py CONFIG-FILE")
255  sys.exit(1)
256 
257  f = open(sys.argv[1])
258  for line in f:
259  # skip comment lines
260  m = re.search('^\s*#', line)
261  if m:
262  continue
263 
264  # parse key=value lines
265  m = re.search('^(\w+)\s*=\s*(\S.*)$', line)
266  if m is None:
267  continue
268  settings[m.group(1)] = m.group(2)
269  f.close()
270 
271  if 'netmagic' not in settings:
272  settings['netmagic'] = 'cee2caff'
273  if 'genesis' not in settings:
274  settings['genesis'] = '00000bafbc94add76cb75e2ec92894837288a481e5c005f6563d91623bf8bc2c'
275  if 'input' not in settings:
276  settings['input'] = 'input'
277  if 'hashlist' not in settings:
278  settings['hashlist'] = 'hashlist.txt'
279  if 'file_timestamp' not in settings:
280  settings['file_timestamp'] = 0
281  if 'split_timestamp' not in settings:
282  settings['split_timestamp'] = 0
283  if 'max_out_sz' not in settings:
284  settings['max_out_sz'] = 1000L * 1000 * 1000
285  if 'out_of_order_cache_sz' not in settings:
286  settings['out_of_order_cache_sz'] = 100 * 1000 * 1000
287 
288  settings['max_out_sz'] = long(settings['max_out_sz'])
289  settings['split_timestamp'] = int(settings['split_timestamp'])
290  settings['file_timestamp'] = int(settings['file_timestamp'])
291  settings['netmagic'] = settings['netmagic'].decode('hex')
292  settings['out_of_order_cache_sz'] = int(settings['out_of_order_cache_sz'])
293 
294  if 'output_file' not in settings and 'output' not in settings:
295  print("Missing output file / directory")
296  sys.exit(1)
297 
298  blkindex = get_block_hashes(settings)
299  blkmap = mkblockmap(blkindex)
300 
301  if not settings['genesis'] in blkmap:
302  print("Genesis block not found in hashlist")
303  else:
304  BlockDataCopier(settings, blkindex, blkmap).run()
305 
def mkblockmap(blkindex)
def calc_hdr_hash(blk_hdr)
def writeBlock(self, inhdr, blk_hdr, rawblock)
def bufreverse(in_buf)
def get_block_hashes(settings)
def __init__(self, settings, blkindex, blkmap)
def calc_hash_str(blk_hdr)
def wordreverse(in_buf)
def get_blk_dt(blk_hdr)