All examples are in Python 3.
The bytes type in Python is immutable and stores a sequence of values ranging from 0-255 (8-bits). You can get the value of a single byte by using an index like an array, but the values can not be modified.
# Create empty bytes
empty_bytes = bytes(4)
print(type(empty_bytes))
print(empty_bytes)
To create a mutable object you need to use the bytearray type. With a bytearray you can do everything you can with other mutables like push, pop, insert, append, delete, and sort.
# Cast bytes to bytearray
mutable_bytes = bytearray(b'\x00\x0F')
# Bytearray allows modification
mutable_bytes[0] = 255
mutable_bytes.append(255)
print(mutable_bytes)
# Cast bytearray back to bytes
immutable_bytes = bytes(mutable_bytes)
print(immutable_bytes)
with open("test_file.dat", "rb") as binary_file:
# Read the whole file at once
data = binary_file.read()
print(data)
# Seek position and read N bytes
binary_file.seek(0) # Go to beginning
couple_bytes = binary_file.read(2)
print(couple_bytes)
i = 16
# Create one byte from the integer 16
single_byte = i.to_bytes(1, byteorder='big', signed=True)
print(single_byte)
# Create four bytes from the integer
four_bytes = i.to_bytes(4, byteorder='big', signed=True)
print(four_bytes)
# Compare the difference to little endian
print(i.to_bytes(4, byteorder='little', signed=True))
# Create bytes from a list of integers with values from 0-255
bytes_from_list = bytes([255, 254, 253, 252])
print(bytes_from_list)
# Create a byte from a base 2 integer
one_byte = int('11110000', 2)
print(one_byte)
# Create an int from bytes. Default is unsigned.
some_bytes = b'\x00\xF0'
i = int.from_bytes(some_bytes, byteorder='big')
print(i)
# Create a signed int
i = int.from_bytes(b'\x00\x0F', byteorder='big', signed=True)
print(i)
# Use a list of integers 0-255 as a source of byte values
i = int.from_bytes([255, 0, 0, 0], byteorder='big')
print(i)
# Binary to Text
binary_data = b'I am text.'
text = binary_data.decode('utf-8')
print(text)
binary_data = bytes([65, 66, 67]) # ASCII values for A, B, C
text = binary_data.decode('utf-8')
print(text)
# Text to Binary
message = "Hello" # str
binary_message = message.encode('utf-8')
print(type(binary_message)) # bytes
# Python has many built in encodings for different languages,
# and even the Caeser cipher is built in
import codecs
cipher_text = codecs.encode(message, 'rot_13')
print(cipher_text)
# Encode binary data to a base 64 string
binary_data = b'\x00\xFF\x00\xFF'
# Use the codecs module to encode
import codecs
base64_data = codecs.encode(binary_data, 'base64')
print(base64_data)
# Or use the binascii module
import binascii
base64_data = binascii.b2a_base64(binary_data)
print(base64_data)
# The base64_string is still a bytes type
# It may need to be decoded to an ASCII string
print(base64_data.decode('utf-8'))
# Decoding is done similarly
print(codecs.decode(base64_data, 'base64'))
print(binascii.a2b_base64(base64_data))
# Starting with a hex string you can unhexlify it to bytes
deadbeef = binascii.unhexlify('DEADBEEF')
print(deadbeef)
# Given raw bytes, get an ASCII string representing the hex values
hex_data = binascii.hexlify(b'\x00\xff') # Two bytes values 0 and 255
# The resulting value will be an ASCII string but it will be a bytes type
# It may be necessary to decode it to a regular string
text_string = hex_data.decode('utf-8') # Result is string "00ff"
print(text_string)
Format strings can be helpful to visualize or output byte values. Format strings require an integer value so the byte will have to be converted to an integer first.
a_byte = b'\xff' # 255
i = ord(a_byte) # Get the integer value of the byte
bin = "{0:b}".format(i) # binary: 11111111
hex = "{0:x}".format(i) # hexadecimal: ff
oct = "{0:o}".format(i) # octal: 377
print(bin)
print(hex)
print(oct)
# Some bytes to play with
byte1 = int('11110000', 2) # 240
byte2 = int('00001111', 2) # 15
byte3 = int('01010101', 2) # 85
# Ones Complement (Flip the bits)
print(~byte1)
# AND
print(byte1 & byte2)
# OR
print(byte1 | byte2)
# XOR
print(byte1 ^ byte3)
# Shifting right will lose the right-most bit
print(byte2 >> 3)
# Shifting left will add a 0 bit on the right side
print(byte2 << 1)
# See if a single bit is set
bit_mask = int('00000001', 2) # Bit 1
print(bit_mask & byte1) # Is bit set in byte1?
print(bit_mask & byte2) # Is bit set in byte2?
Packing and unpacking requires a string that defines how the binary data is structured. It needs to know which bytes represent values. It needs to know whether the entire set of bytes represets characters or if it is a sequence of 4-byte integers. It can be structured in any number of ways. The format strings can be simple or complex. In this example I am packing a single four-byte integer followed by two characters. The letters i and c represent integers and characters.
import struct
# Packing values to bytes
# The first parameter is the format string. Here it specifies the data is structured
# with a single four-byte integer followed by two characters.
# The rest of the parameters are the values for each item in order
binary_data = struct.pack("icc", 8499000, b'A', b'Z')
print(binary_data)
# When unpacking, you receive a tuple of all data in the same order
tuple_of_data = struct.unpack("icc", binary_data)
print(tuple_of_data)
# For more information on format strings and endiannes, refer to
# https://docs.python.org/3.5/library/struct.html
You might need to know what byte order your system uses. Byte order refers to big endian or little endian. The sys module can provide that value.
# Find out what byte order your system uses
import sys
print("Native byteorder: ", sys.byteorder)
# diff.py - Do two files match?
# Exercise: Rewrite this code to compare the files part at a time so it
# will not run out of RAM with large files.
import sys
with open(sys.argv[1], 'rb') as file1, open(sys.argv[2], 'rb') as file2:
data1 = file1.read()
data2 = file2.read()
if data1 != data2:
print("Files do not match.")
else:
print("Files match.")
#is_jpeg.py - Does the file have a JPEG binary signature?
import sys
import binascii
jpeg_signatures = [
binascii.unhexlify(b'FFD8FFD8'),
binascii.unhexlify(b'FFD8FFE0'),
binascii.unhexlify(b'FFD8FFE1')
]
with open(sys.argv[1], 'rb') as file:
first_four_bytes = file.read(4)
if first_four_bytes in jpeg_signatures:
print("JPEG detected.")
else:
print("File does not look like a JPEG.")
# read_boot_sector.py - Inspect the first 512 bytes of a file
import sys
in_file = open(sys.argv[1], 'rb') # Provide a path to disk or ISO image
chunk_size = 512
data = in_file.read(chunk_size)
print(data)
# find_ascii_in_binary.py - Identify ASCII characters in binary files
import sys
from functools import partial
chunk_size = 1
with open(sys.argv[1], 'rb') as in_file:
for data in iter(partial(in_file.read, chunk_size), b''):
x = int.from_bytes(data, byteorder='big')
if (x > 64 and x < 91) or (x > 96 and x < 123) :
sys.stdout.write(chr(x))
else:
sys.stdout.write('.')
# create_stego_zip_jpg.py - Hide a zip file inside a JPEG
import sys
# Start with a jpeg file
jpg_file = open(sys.argv[1], 'rb') # Path to JPEG
jpg_data = jpg_file.read()
jpg_file.close()
# And the zip file to embed in the jpeg
zip_file = open(sys.argv[2], 'rb') # Path to ZIP file
zip_data = zip_file.read()
zip_file.close()
# Combine the files
out_file = open('special_image.jpg', 'wb') # Output file
out_file.write(jpg_data)
out_file.write(zip_data)
out_file.close()
# The resulting output file appears like a normal jpeg but can also be
# unzipped and used as an archive.