- Merkle–Damgård based hash function
- Vulnerable to Length extension attack
SHA-1 Collision Strings
URL decode strings.
PDF Collisions¶
Take 2 pdfs and generate new versions of both of them to contain a sha1 collision
#Install Dependencies
yay -S ghostscript turbojpeg
#Download Github repo
git clone
cd sha1collider
#Download sample pdfs
#Ensure they are diffrent files
[gen0@gen0 sha1collider]$ sha256sum dummy.pdf pdf-sample.pdf
3df79d34abbca99308e79cb94461c1893582604d68329a41fd4bec1885e6adb4 dummy.pdf
60bdd13ea4827b8de375c79dc3ff847f83b55bd73b6461523fdf8f843b5a0d5b pdf-sample.pdf
[gen0@gen0 sha1collider]$ sha1sum dummy.pdf pdf-sample.pdf
90ffd2359008d82298821d16b21778c5c39aec36 dummy.pdf
fc80d59877b4ae21911591b53664b2da1324cf25 pdf-sample.pdf
Generating and testing PDFs:
>>> python3 dummy.pdf pdf-sample.pdf
[14:39:08] INFO: rendering file 1...
GPL Ghostscript 9.56.1 (2022-04-04)
Copyright (C) 2022 Artifex Software, Inc. All rights reserved.
This software is supplied under the GNU AGPLv3 and comes with NO WARRANTY:
see the file COPYING for details.
Processing pages 1 through 1.
Page 1
>>> sha1sum *.pdf
90ffd2359008d82298821d16b21778c5c39aec36 dummy.pdf
ed880ae0030504a51ab39eb36b533383d374cd41 out-dummy.pdf
ed880ae0030504a51ab39eb36b533383d374cd41 out-pdf-sample.pdf
fc80d59877b4ae21911591b53664b2da1324cf25 pdf-sample.pdf
>>> sha256sum *.pdf
3df79d34abbca99308e79cb94461c1893582604d68329a41fd4bec1885e6adb4 dummy.pdf
675eb780b254d32bf2cad40d9a9702787048b2fbe33228ec4311f812fa472149 out-dummy.pdf
b5b7fdc2b6d9b7f9e7907a6b029cb3246d381b0bc7487b1ec282c5c7557e0fef out-pdf-sample.pdf
60bdd13ea4827b8de375c79dc3ff847f83b55bd73b6461523fdf8f843b5a0d5b pdf-sample.pdf
from cryptopals_lib import *
class SHA(object):
def __init__(self):
self.buffers = [0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0]
self.round_constants = [0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6]
def _set_message(self, message):
#Convert to bytes if not already
byte_message = bytearray(message)
#Get Length shifted by 8 and limit to 64bit int
input_length_data = asint64(len(byte_message) << 3)
#Append 0x80 to the end of the message as a end of message byte
#Pad the data to a multable of 64 bytes when the 8 byte input_length_data is added
while len(byte_message) % 64 != 56:
#Append the length data to the message
byte_message += int_to_bytes_length(input_length_data, 8)
return byte_message
def _hash_message_chunk(self, chunk):
temp_buffers = self.buffers[:]
#Create the start of the temp chunks
temp_chunks = bytes_to_intarray(chunk, 4, byte_order="big")
#Generate the rest of the chunks
for index in range(16, 80):
temp_chunks.append(shift_rotate_left(temp_chunks[index-3] ^ temp_chunks[index-8] ^ temp_chunks[index-14] ^ temp_chunks[index-16], 1))
#First Rounds itteration
for round_itteration in range(20):
#print(round_itteration, temp_buffers)
#Do Function F (b & c) ^ (~b & d)
temp_value = fixedlen_xor((temp_buffers[1] & temp_buffers[2]), (~temp_buffers[1] & temp_buffers[3]))
#Add Varables mod 32
#print(shift_rotate_left(temp_buffers[0], 5), temp_value, temp_buffers[4], self.round_constants[0], temp_chunks[round_itteration])
temp_value = asint32(shift_rotate_left(temp_buffers[0], 5) + temp_value + temp_buffers[4] + self.round_constants[0] + temp_chunks[round_itteration])
#Swap values in to the new buffer
temp_buffers = [temp_value, temp_buffers[0], shift_rotate_left(temp_buffers[1], 30), temp_buffers[2], temp_buffers[3]]
for round_itteration in range(20, 40):
#print(round_itteration, temp_buffers)
#Do Function G b ^ c ^ d
temp_value = fixedlen_xor(temp_buffers[1], fixedlen_xor(temp_buffers[2], temp_buffers[3]))
#Add Varables mod 32
temp_value = asint32(shift_rotate_left(temp_buffers[0], 5) + temp_value + temp_buffers[4] + self.round_constants[1] + temp_chunks[round_itteration])
#Swap values in to the new buffer
temp_buffers = [temp_value, temp_buffers[0], shift_rotate_left(temp_buffers[1], 30), temp_buffers[2], temp_buffers[3]]
for round_itteration in range(40, 60):
#print(round_itteration, temp_buffers)
#Do Function H (b & c) ^ (b & d) ^ (c & d)
temp_value = fixedlen_xor(fixedlen_xor((temp_buffers[1] & temp_buffers[2]), (temp_buffers[1] & temp_buffers[3])), (temp_buffers[2] & temp_buffers[3]))
#Add Varables mod 32
temp_value = asint32(shift_rotate_left(temp_buffers[0], 5) + temp_value + temp_buffers[4] + self.round_constants[2] + temp_chunks[round_itteration])
#Swap values in to the new buffer
temp_buffers = [temp_value, temp_buffers[0], shift_rotate_left(temp_buffers[1], 30), temp_buffers[2], temp_buffers[3]]
for round_itteration in range(60, 80):
#print(round_itteration, temp_buffers)
#Do Function I b ^ c ^ d
temp_value = fixedlen_xor(temp_buffers[1], fixedlen_xor(temp_buffers[2], temp_buffers[3]))
#Add Varables mod 32
temp_value = asint32(shift_rotate_left(temp_buffers[0], 5) + temp_value + temp_buffers[4] + self.round_constants[3] + temp_chunks[round_itteration])
#Swap values in to the new buffer
temp_buffers = [temp_value, temp_buffers[0], shift_rotate_left(temp_buffers[1], 30), temp_buffers[2], temp_buffers[3]]
#Chunks are done with the round
#Update the internal buffers with the new data
self.buffers = [asint32(self.buffers[0] + temp_buffers[0]),
asint32(self.buffers[1] + temp_buffers[1]),
asint32(self.buffers[2] + temp_buffers[2]),
asint32(self.buffers[3] + temp_buffers[3]),
asint32(self.buffers[4] + temp_buffers[4])]
def hash(self, message):
#Setup message with padding and length data
byte_message = self._set_message(message)
#Opperate on each of the 64 byte chunks
for chunk in to_blocks(byte_message, 64):
#Convert Intagers to Byte string
output = b""
for x in self.buffers:
output += (x).to_bytes(4, byteorder='big')
return output
def hash_digest(self, message):
return self.hash(message).hex()
if __name__ == '__main__':
testsha = SHA()
testsha = SHA()
testsha = SHA()