Talk:Package Files

From PS4 Developer wiki
Revision as of 17:31, 28 March 2020 by Roxanne (talk | contribs) (Roxanne moved page Talk:PKG files to Talk:PKG Files: wikify)
Jump to navigation Jump to search

PKG file

UnPKG tool

# UnPKG rev 0x00000008 (public edition), (c) flatz

import sys, os, hashlib, hmac, struct, math, traceback
from cStringIO import StringIO

# parse arguments

if len(sys.argv) < 3:
	script_file_name = os.path.split(sys.argv[0])[1]
	print 'usage: {0} <pkg file> <output dir>'.format(script_file_name)
	sys.exit()

pkg_file_path = sys.argv[1]
if not os.path.isfile(pkg_file_path):
	print 'error: invalid file specified'
	sys.exit()

output_dir = sys.argv[2]
if os.path.exists(output_dir) and not os.path.isdir(output_dir):
	print 'error: invalid directory specified'
	sys.exit()
elif not os.path.exists(output_dir):
	os.makedirs(output_dir)

# cryptography functions

def sha256(data):
	return hashlib.sha256(data).digest()

# utility functions

uint64_fmt, uint32_fmt, uint16_fmt, uint8_fmt = '>Q', '>I', '>H', '>B'
int64_fmt, int32_fmt, int16_fmt, int8_fmt = '>q', '>i', '>h', '>b'

def read_string(f, length):
	return f.read(length)
def read_cstring(f):
	s = ''
	while True:
		c = f.read(1)
		if not c:
			return False
		if ord(c) == 0:
			break
		s += c
	return s

def read_uint8_le(f):
	return struct.unpack('<B', f.read(struct.calcsize('<B')))[0]
def read_uint8_be(f):
	return struct.unpack('>B', f.read(struct.calcsize('>B')))[0]
def read_uint16_le(f):
	return struct.unpack('<H', f.read(struct.calcsize('<H')))[0]
def read_uint16_be(f):
	return struct.unpack('>H', f.read(struct.calcsize('>H')))[0]
def read_uint32_le(f):
	return struct.unpack('<I', f.read(struct.calcsize('<I')))[0]
def read_uint32_be(f):
	return struct.unpack('>I', f.read(struct.calcsize('>I')))[0]
def read_uint64_le(f):
	return struct.unpack('<Q', f.read(struct.calcsize('<Q')))[0]
def read_uint64_be(f):
	return struct.unpack('>Q', f.read(struct.calcsize('>Q')))[0]
def read_int8_le(f):
	return struct.unpack('<b', f.read(struct.calcsize('<b')))[0]
def read_int8_be(f):
	return struct.unpack('>b', f.read(struct.calcsize('>b')))[0]
def read_int16_le(f):
	return struct.unpack('<h', f.read(struct.calcsize('<h')))[0]
def read_int16_be(f):
	return struct.unpack('>h', f.read(struct.calcsize('>h')))[0]
def read_int32_le(f):
	return struct.unpack('<i', f.read(struct.calcsize('<i')))[0]
def read_int32_be(f):
	return struct.unpack('>i', f.read(struct.calcsize('>i')))[0]
def read_int64_le(f):
	return struct.unpack('<q', f.read(struct.calcsize('<q')))[0]
def read_int64_be(f):
	return struct.unpack('>q', f.read(struct.calcsize('>q')))[0]

# main code

PKG_MAGIC = '\x7FCNT'
CONTENT_ID_SIZE = 0x24
SHA256_HASH_SIZE = 0x20
META_ENTRY_SIZE = 0x20

FILE_TYPE_FLAGS_RETAIL = 1 << 31

ENTRY_TYPE_DIGEST_TABLE = 0x0001
ENTRY_TYPE_0x800        = 0x0010
ENTRY_TYPE_0x200        = 0x0020
ENTRY_TYPE_0x180        = 0x0080
ENTRY_TYPE_META_TABLE   = 0x0100
ENTRY_TYPE_NAME_TABLE   = 0x0200

ENTRY_TYPE_LICENSE = 0x04
ENTRY_TYPE_FILE1   = 0x10
ENTRY_TYPE_FILE2   = 0x12

ENTRY_TABLE_MAP = {
	ENTRY_TYPE_DIGEST_TABLE: '.digests',
	ENTRY_TYPE_0x800: '.entry_0x800',
	ENTRY_TYPE_0x200: '.entry_0x200',
	ENTRY_TYPE_0x180: '.entry_0x180',
	ENTRY_TYPE_META_TABLE: '.meta',
	ENTRY_TYPE_NAME_TABLE: '.names',

	0x0400: 'license.dat',
	0x0401: 'license.info',
	0x1000: 'param.sfo',
	0x1001: 'playgo-chunk.dat',
	0x1002: 'playgo-chunk.sha',
	0x1003: 'playgo-manifest.xml',
	0x1004: 'pronunciation.xml',
	0x1005: 'pronunciation.sig',
	0x1006: 'pic1.png',
	0x1008: 'app/playgo-chunk.dat',
	0x1200: 'icon0.png',
	0x1220: 'pic0.png',
	0x1240: 'snd0.at9',
	0x1260: 'changeinfo/changeinfo.xml',
}

class MyError(Exception):
	def __init__(self, message):
		self.message = message

	def __str__(self):
		return repr(self.message)

class FileTableEntry:
	entry_fmt = '>IIIIII8x'

	def __init__(self):
		pass

	def read(self, f):
		self.type, self.unk1, self.flags1, self.flags2, self.offset, self.size = struct.unpack(self.entry_fmt, f.read(struct.calcsize(self.entry_fmt)))
		self.key_index = (self.flags2 & 0xF000) >> 12
		self.name = None

try:
	with open(pkg_file_path, 'rb') as pkg_file:
		magic = read_string(pkg_file, 4)
		if magic != PKG_MAGIC:
			raise MyError('invalid file magic')

		type = read_uint32_be(pkg_file)
		is_retail = (type & FILE_TYPE_FLAGS_RETAIL) != 0

		pkg_file.seek(0x10) # FIXME: or maybe uint16 at 0x16???
		num_table_entries = read_uint32_be(pkg_file)

		pkg_file.seek(0x14)
		num_system_entries = read_uint16_be(pkg_file)

		pkg_file.seek(0x18)
		file_table_offset = read_uint32_be(pkg_file)

		pkg_file.seek(0x1C)
		main_entries_data_size = read_uint32_be(pkg_file)

		pkg_file.seek(0x24)
		body_offset = read_uint32_be(pkg_file)
		pkg_file.seek(0x2C)
		body_size = read_uint32_be(pkg_file)

		pkg_file.seek(0x414)
		content_offset = read_uint32_be(pkg_file)
		pkg_file.seek(0x41C)
		content_size = read_uint32_be(pkg_file)

		pkg_file.seek(0x40)
		content_id = read_cstring(pkg_file)
		if len(content_id) != CONTENT_ID_SIZE:
			raise MyError('invalid content id')

		pkg_file.seek(0x100)
		main_entries1_digest = pkg_file.read(SHA256_HASH_SIZE)
		main_entries2_digest = pkg_file.read(SHA256_HASH_SIZE)
		digest_table_digest = pkg_file.read(SHA256_HASH_SIZE)
		body_digest = pkg_file.read(SHA256_HASH_SIZE)

		pkg_file.seek(0x440)
		content_digest = pkg_file.read(SHA256_HASH_SIZE)
		content_one_block_digest = pkg_file.read(SHA256_HASH_SIZE)

		table_entries = []
		table_entries_map = {}
		pkg_file.seek(file_table_offset)
		for i in xrange(num_table_entries):
			entry = FileTableEntry()
			entry.read(pkg_file)
			table_entries_map[entry.type] = len(table_entries)
			table_entries.append(entry)

		entry_names = None
		entry_digests = None
		for i in xrange(num_table_entries):
			entry = table_entries[i]
			if entry.type == ENTRY_TYPE_NAME_TABLE:
				pkg_file.seek(entry.offset)
				data = pkg_file.read(entry.size)
				if data and len(data) > 0:
					data = StringIO(data)
					entry_names = []
					c = data.read(1)
					if ord(c) == 0:
						while True:
							name = read_cstring(data)
							if not name:
								break
							entry_names.append(name)
					else:
						raise MyError('weird name table format')
				break
		entry_name_index = 0
		for i in xrange(num_table_entries):
			entry = table_entries[i]
			type, index = (entry.type >> 8) & 0xFF, entry.type & 0xFF
			if type == ENTRY_TYPE_FILE1 or type == ENTRY_TYPE_FILE2:
				if entry_name_index < len(entry_names):
					entry.name = entry_names[entry_name_index]
					entry_name_index += 1
				else:
					raise MyError('entry name index out of bounds')
			elif entry.type in ENTRY_TABLE_MAP:
				entry.name = ENTRY_TABLE_MAP[entry.type]
			if entry.type == ENTRY_TYPE_DIGEST_TABLE and entry_digests is None:
				pkg_file.seek(entry.offset)
				entry_digests = pkg_file.read(entry.size)

		data = ''
		for entry_type in [ENTRY_TYPE_0x800, ENTRY_TYPE_0x200, ENTRY_TYPE_0x180, ENTRY_TYPE_META_TABLE, ENTRY_TYPE_DIGEST_TABLE]:
			entry = table_entries[table_entries_map[entry_type]]
			pkg_file.seek(entry.offset)
			data += pkg_file.read(entry.size)
		computed_main_entries1_digest = sha256(data)

		data = ''
		for entry_type in [ENTRY_TYPE_0x800, ENTRY_TYPE_0x200, ENTRY_TYPE_0x180, ENTRY_TYPE_META_TABLE]:
			entry = table_entries[table_entries_map[entry_type]]
			pkg_file.seek(entry.offset)
			size = entry.size if entry_type != ENTRY_TYPE_META_TABLE else num_system_entries * META_ENTRY_SIZE
			data += pkg_file.read(size)
		computed_main_entries2_digest = sha256(data)

		entry = table_entries[table_entries_map[ENTRY_TYPE_DIGEST_TABLE]]
		pkg_file.seek(entry.offset)
		data = pkg_file.read(entry.size)
		computed_digest_table_digest = sha256(data)

		pkg_file.seek(body_offset)
		body = pkg_file.read(body_size)
		computed_body_digest = sha256(body)

		computed_entry_digests = '\x00' * SHA256_HASH_SIZE
		for i in xrange(num_table_entries):
			entry = table_entries[i]
			if entry.type == ENTRY_TYPE_DIGEST_TABLE:
				continue
			pkg_file.seek(entry.offset)
			data = pkg_file.read(entry.size)
			computed_entry_digests += sha256(data)

		for i in xrange(num_table_entries):
			entry = table_entries[i]
			name = entry.name if entry.name is not None else 'entry_{0:03}.bin'.format(i)
			file_path = os.path.join(output_dir, name)
			file_dir = os.path.split(file_path)[0]
			if not os.path.exists(file_dir):
				os.makedirs(file_dir)
			with open(file_path, 'wb') as entry_file:
				pkg_file.seek(entry.offset)
				data = pkg_file.read(entry.size)
				entry_file.write(data)

		block_size = 0x10000
		num_blocks = 1 + int((content_size - 1) / block_size) if content_size > 0 else 0

		pkg_file.seek(content_offset)
		data = pkg_file.read(block_size)
		computed_content_one_block_digest = sha256(data)

		hash_context = hashlib.sha256()
		pkg_file.seek(content_offset)
		bytes_left = content_size
		for i in xrange(num_blocks):
			current_size = block_size if bytes_left > block_size else bytes_left
			data = pkg_file.read(current_size)
			hash_context.update(data)
			bytes_left -= block_size
		computed_content_digest = hash_context.digest()

		is_digests_valid = computed_main_entries1_digest == main_entries1_digest
		is_digests_valid = is_digests_valid and computed_main_entries2_digest == main_entries2_digest
		is_digests_valid = is_digests_valid and computed_digest_table_digest == digest_table_digest
		is_digests_valid = is_digests_valid and computed_body_digest == body_digest
		is_digests_valid = is_digests_valid and computed_entry_digests == entry_digests
		is_digests_valid = is_digests_valid and computed_content_digest == content_digest
		is_digests_valid = is_digests_valid and computed_content_one_block_digest == content_one_block_digest

        	print 'File information:'
		print '             Magic: 0x{0}'.format(magic.encode('hex').upper())
		print '              Type: 0x{0:08X}'.format(type), '(retail)' if is_retail else ''
		print '        Content ID: {0}'.format(content_id)
		print ' Num table entries: {0}'.format(num_table_entries)
		print 'Entry table offset: 0x{0:08X}'.format(file_table_offset)
		print '     Digest status: {0}'.format('OK' if is_digests_valid else 'FAIL')
		print

		if num_table_entries > 0:
			print 'Table entries:'
			for i in xrange(num_table_entries):
				entry = table_entries[i]
				print '  Entry #{0:03}:'.format(i)
				print '         Type: 0x{0:08X}'.format(entry.type)
				print '         Unk1: 0x{0:08X}'.format(entry.unk1)
				if entry.name is not None:
					print '         Name: {0}'.format(entry.name)
				print '       Offset: 0x{0:08X}'.format(entry.offset)
				print '         Size: 0x{0:08X}'.format(entry.size)
				print '      Flags 1: 0x{0:08X}'.format(entry.flags1)
				print '      Flags 2: 0x{0:08X}'.format(entry.flags2)
				print '    Key index: {0}'.format('N/A' if entry.key_index == 0 else entry.key_index)
			print
		
except IOError:
	print 'error: i/o error during processing'
except MyError as e:
	print 'error: {0}', e.message
except:
	print 'error: unexpected error:', sys.exc_info()[0]
	traceback.print_exc(file=sys.stdout)

Table Entry Hashes (SHA-256)

The first entry in the index table points to a block of hashes (Using Amazon Instant Video: UP2064-CUSA00130_00-AIV00000000000US.pkg for this example).

0x2A80  00 00 00 01 00 00 00 00 40 00 00 00 00 00 00 00  ........@....... First Entry in Index Table.
0x2A90  00 00 2C A0 00 00 02 20 00 00 00 00 00 00 00 00  .., ... ........
0x2AA0  00 00 00 10 00 00 00 00 60 00 00 00 00 00 00 00  ........`.......
0x2AB0  00 00 20 00 00 00 08 00 00 00 00 00 00 00 00 00  .. .............
0x2AC0  00 00 00 20 00 00 00 00 E0 00 00 00 00 00 30 00  ... ....à.....0.
0x2AD0  00 00 28 00 00 00 01 00 00 00 00 00 00 00 00 00  ..(.............
...

Offset: 0x00002CA0 Length: 0x00000220

0x2CA0  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
0x2CB0  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
0x2CC0  B4 34 DD 9C B3 C7 91 96 EC F7 D1 F6 8F 8A 2D 18  ´4Ýœ³Ç‘–ì÷Ñö.Š-.
0x2CD0  8D 07 8F 2D 33 E9 09 6B 1D 22 B5 4E 7A F2 6D 6A  ...-3é.k."µNzòmj
0x2CE0  F0 F5 9E 95 F4 74 13 FE 9F 35 DB 44 57 76 DE 49  ðõž•ôt.þŸ5ÛDWvÞI
0x2CF0  90 B1 68 20 97 8F 70 79 9D 62 95 CD 97 67 5D B0  .±h —.py.b•Í—g]°
0x2D00  1D 6E EE E7 67 3D 7E B4 2F 78 F1 26 2C EE EC 7A  .nîçg=~´/xñ&,îìz
0x2D10  10 40 90 BA FD 0F F9 AF BE ED F1 BC DE 84 30 55  .@.ºý.ù¯¾íñ¼Þ„0U
0x2D20  57 65 D8 7B DD 85 5E D0 73 1B 78 4D A6 EE 00 CF  WeØ{Ý…^Ðs.xM¦î.Ï
0x2D30  A1 0C 3F C4 03 E0 19 5A 0B 36 E1 64 33 7A D1 C6  ¡.?Ä.à.Z.6ád3zÑÆ
0x2D40  1A 4E E9 FA 4F DD AD F4 63 FF 73 8F 9F 24 6F 0E  .NéúOÝ.ôcÿs.Ÿ$o.
0x2D50  DF 22 EB 3D 43 F1 A3 7D C6 D0 BD 97 49 03 EC C2  ß"ë=Cñ£}Æн—I.ìÂ
0x2D60  DB 04 17 61 81 6A 14 9B 0F A3 B6 D7 6D AA 48 5A  Û..a.j.›.£¶×mªHZ
0x2D70  1F 3E 95 6B 63 BD AE B2 A2 E0 AE 44 8D D0 05 EA  .>•kc½®²¢à®D.Ð.ê
0x2D80  93 BB 8F 3E 60 72 F8 0C BD BA DB 0E 4D 01 AA AA  “».>`rø.½ºÛ.M.ªª
0x2D90  65 C0 97 E3 89 18 BB A2 17 6E 49 EE 3A 36 CA 91  eÀ—ã‰.»¢.nIî:6Ê‘
0x2DA0  5B EE 4F 1B 1B 7F 52 17 04 99 DD 8C 19 3A 31 BB  [îO...R..™ÝŒ.:1»
0x2DB0  79 9D F4 70 38 D5 F6 DD FF AA 76 5E 10 F2 CC 8F  y.ôp8ÕöÝÿªv^.òÌ.
0x2DC0  0A D9 DC 1C BA 98 EB B3 4A 74 02 E9 F1 0A 0A 90  .ÙÜ.º˜ë³Jt.éñ...
0x2DD0  69 AC D0 29 9F 93 DF 45 80 35 6E FB AF D6 B1 A5  i¬Ð)Ÿ“ßE€5nû¯Ö±¥
0x2DE0  C6 13 74 C9 51 F7 BA A5 CF 0D DE 13 E3 BB 02 0D  Æ.tÉQ÷º¥Ï.Þ.ã»..
0x2DF0  06 6E 44 64 FF 2A CA 37 B0 20 4C 03 44 CA 5E C9  .nDdÿ*Ê7° L.DÊ^É
0x2E00  B4 D0 03 6B 54 4A 66 ED C7 32 CB D2 E0 34 CF 5F  ´Ð.kTJfíÇ2ËÒà4Ï_
0x2E10  5B 1F 46 B5 81 72 09 D3 33 B3 3E 5E FC 01 6B 11  [.Fµ.r.Ó3³>^ü.k.
0x2E20  9A DF 99 EE A2 2B 5E E2 72 B9 32 02 6B B7 E8 D1  šß™î¢+^âr¹2.k·èÑ
0x2E30  5A 9D B8 A9 97 17 47 4F 11 75 FA 41 6E 79 7A 1B  Z.¸©—.GO.uúAnyz.
0x2E40  94 A5 62 30 EA E0 99 89 3D BB 34 5D 0B F5 E3 17  ”¥b0êà™‰=»4].õã.
0x2E50  BE 2C EE 7B D5 EA 8F 05 FB 0E 07 A2 40 FF 7A 59  ¾,î{Õê..û..¢@ÿzY
0x2E60  6B FE F8 0B 1E 61 85 83 18 9A 53 3A F0 91 46 B7  kþø..a…ƒ.šS:ð‘F·
0x2E70  86 83 38 B8 C1 3E E8 74 C5 4F 4E E6 B6 28 7F 52  †ƒ8¸Á>ètÅONæ¶(.R
0x2E80  55 FE CE EA F4 9E 98 2A BC A5 C4 21 D6 44 17 C9  UþÎêôž˜*¼¥Ä!ÖD.É
0x2E90  76 D9 1C 02 FD 75 BB 37 C3 96 1A C3 1C 3E 5B 5F  vÙ..ýu»7Ö.Ã.>[_
0x2EA0  2B 37 CA 02 AB E2 B7 C6 FB 74 23 B9 A6 C2 C6 0B  +7Ê.«â·Æût#¹¦ÂÆ.
0x2EB0  70 6F 79 CB AE 80 D9 B9 62 1A D6 69 F6 47 FB F2  poyË®€Ù¹b.ÖiöGûò

First hash is blank, can't hash the table of hashes.
The remaining hashes are for each of the remaining Index Table Entries.

i.e the second entry in the Index Table should have a SHA-256 matching the following

0xB434DD9CB3C79196ECF7D1F68F8A2D188D078F2D33E9096B1D22B54E7AF26D6A