I am trying to code basic steganography algorithms for B&W images using Python PIL.
Using an example image I can successfully extract the hidden image within it, and hide other images to subsequently extract them. The problem comes with hiding text and then extracting it.
The code is as follows:
from PIL import Image
import matplotlib.pyplot as plt
import scipy.misc as sci
import numpy as np
import array
#CONVERTS IMAGE TO ARRAY OF BINARY 8-BIT NUMBER#
def getImgArray(image):
w,h = image.size
out = []
for x in range(w):
for y in range(h):
pixel = image.getpixel((y,x))
pixel = format(pixel, '08b')
out.append(pixel)
return out
def stringByteConverter(data, mode):
if (mode == "stringToByte"):
aux = map(ord,data.encode('utf8'))
aux = [format(char,'08b') for char in aux]
return aux
elif (mode == "byteToString"):
aux = [int(item,2) for item in data]
aux = "".join(map(chr, aux))
return aux
else:
print("Invalid mode. Use 'stringToByte' or 'byteToString'")
#GETS HIDDEN IMAGE AND RETURNS IT AS BYTE ARRAY REPRESENTING PIXELS#
def getHiddenImage(image):
buf = ""
width,height = image.size
img_aux = []
for x in range(width):
for y in range(height):
if(len(buf)<8):
pixel = image.getpixel((y,x))
pixel = format(pixel,'08b')
buf += pixel[-2:]
else:
img_aux.append(buf)
buf = ""
pixel = image.getpixel((y,x))
pixel = format(pixel,'08b')
buf += pixel[-2:]
return img_aux
#CONVERT ARRAY OF BYTES TO PNG IMG AND RETURNS PIL IMG OBJECT#
def saveImgArr(ImgArr, size, outputName):
pixels = np.empty(size)
iterator = 0
for i in range(size[0]):
for j in range(size[1]):
try:
pixels[i][j] = int(ImgArr[iterator],2)
iterator += 1
except IndexError:
break
aux = Image.fromarray(pixels)
aux = aux.convert("L")
aux.save(outputName+'.png', 'PNG')
return pixels
#HIDE IMAGE <src> IN OTHER IMAGE <img>#
def hideImg(src, img, output):
iterator = 0
src = src.convert("L")
srcArr = getImgArray(src)
imgArr = getImgArray(img)
for i in range(len(srcArr)):
buf = []
buf.append(srcArr[i][:2])
buf.append(srcArr[i][2:4])
buf.append(srcArr[i][4:6])
buf.append(srcArr[i][6:])
for j in range(4):
imgArr[iterator] = imgArr[iterator][:-2] + buf[j]
iterator += 1
saveImgArr(imgArr,img.size,output)
#HIDE STRING INSIDE IMG#
def hideText(img, string, outputName):
imgArr = getImgArray(img)
stringBytes = stringByteConverter(string, "stringToByte")
iterator = 0
for i in range(len(string)):
buf = []
buf.append(stringBytes[i][:2])
buf.append(stringBytes[i][2:4])
buf.append(stringBytes[i][4:6])
buf.append(stringBytes[i][6:])
for j in range(4):
imgArr[iterator] = imgArr[iterator][:-2] + '00'
imgArr[iterator] = imgArr[iterator][:-2] + buf[j]
iterator += 1
print(imgArr[:len(string)*4]) #test print
saveImgArr(imgArr,img.size,outputName)
temp = Image.open(outputName+'.png')
tempArr = getImgArray(temp)
print(tempArr[:len(string)*4]) #test print
def getHiddenText(img, msgSize):
buf = ''
width,height = img.size
output = []
counter = 0
for x in range(width):
for y in range(height):
if(counter < msgSize*4):
pixel = img.getpixel((y,x))
pixel = format(pixel,'08b')
buf += pixel[-2:]
counter += 1
output = stringByteConverter(buf, "byteToString")
return output
By printing the data array in the hideText() function I was able to obtain the following:
hideText(lena,'test',"lena_hidden_text")
['10100001', '10100011', '10100001', '10100000', '10100001', '10011110', '10100001', '10100001', '10100101', '10100011', '10100000', '10011111', '10011001', '10100011', '10011101', '10011000']
['10011110', '10100000', '10011110', '10011101', '10011110', '10011011', '10011110', '10011110', '10100011', '10100000', '10011101', '10011100', '10010101', '10100000', '10011001', '10010100']
The first vector obtained by the hideText() call is exactly as it should be, but after saving the image using saveImgArr() and reloading it using getImgArr(), the second vector is returned and it is completely different.
I can't for the life of me find the problem. It is weird since using images to extract hidden data or to hide data, both of those functions work perfectly.
I can only guess I'm dealing with the text bytes wrong in some way. Any insight would be appreciated.
One thing that looks suspicious is in saveImgArr:
The default mode for Image.fromarray to use is deduced from the datatype of the input.
In your case the data type of the input is numpy's default datatype (floats), so the Image will be constructed based on floats. I would therefore predict that the saved png image looks incorrect (just a blank image as every pixel will saturate to 1.0).
To correct this, you can either provide the correct datatype to numpy i.e. change:
to
or explicitly provide the mode to Image.fromarray by changing:
to