38

I'm looking for a Python implementation of the SHA-256 hash function. I want to use it to get a better understanding of how the SHA-256 function works, and I think Python is the ideal language for this. Pseudo-code has the limitation that I can't run/test it, to see what my modifications of the code do to the output.

2
  • 1
    However, for "production" use, it's (nearly) always better to use the library version of crypto code. from Crypto.Hash import SHA256 Commented Aug 31, 2017 at 9:47
  • @MikeW, Is Pycrytodome proven tool? Is there other alternative? Commented Jul 17, 2020 at 6:59

6 Answers 6

31

PyPy's source contains a pure-python implementation of SHA-256 here. Poking around in that directory, you'll probably also find pure-python implementations of other standard hashes.

Sign up to request clarification or add additional context in comments.

Comments

7
initial_hash_values=[
'6a09e667','bb67ae85','3c6ef372','a54ff53a',
'510e527f','9b05688c','1f83d9ab','5be0cd19'
]

sha_256_constants=[
'428a2f98','71374491','b5c0fbcf','e9b5dba5',
'3956c25b','59f111f1','923f82a4','ab1c5ed5',
'd807aa98','12835b01','243185be','550c7dc3',
'72be5d74','80deb1fe','9bdc06a7','c19bf174',
'e49b69c1','efbe4786','0fc19dc6','240ca1cc',
'2de92c6f','4a7484aa','5cb0a9dc','76f988da',
'983e5152','a831c66d','b00327c8','bf597fc7',
'c6e00bf3','d5a79147','06ca6351','14292967',
'27b70a85','2e1b2138','4d2c6dfc','53380d13',
'650a7354','766a0abb','81c2c92e','92722c85',
'a2bfe8a1','a81a664b','c24b8b70','c76c51a3',
'd192e819','d6990624','f40e3585','106aa070',
'19a4c116','1e376c08','2748774c','34b0bcb5',
'391c0cb3','4ed8aa4a','5b9cca4f','682e6ff3',
'748f82ee','78a5636f','84c87814','8cc70208',
'90befffa','a4506ceb','bef9a3f7','c67178f2'
]

def bin_return(dec):
    return(str(format(dec,'b')))

def bin_8bit(dec):
    return(str(format(dec,'08b')))

def bin_32bit(dec):
    return(str(format(dec,'032b')))

def bin_64bit(dec):
    return(str(format(dec,'064b')))

def hex_return(dec):
    return(str(format(dec,'x')))

def dec_return_bin(bin_string):
    return(int(bin_string,2))

def dec_return_hex(hex_string):
    return(int(hex_string,16))

def L_P(SET,n):
    to_return=[]
    j=0
    k=n
    while k<len(SET)+1:
        to_return.append(SET[j:k])
        j=k
        k+=n 
    return(to_return)

def s_l(bit_string):
    bit_list=[]
    for i in range(len(bit_string)):
        bit_list.append(bit_string[i])
    return(bit_list)

def l_s(bit_list):
    bit_string=''
    for i in range(len(bit_list)):
        bit_string+=bit_list[i]
    return(bit_string)

def rotate_right(bit_string,n):
    bit_list = s_l(bit_string)
    count=0
    while count <= n-1:
        list_main=list(bit_list)
        var_0=list_main.pop(-1)
        list_main=list([var_0]+list_main)
        bit_list=list(list_main)
        count+=1
    return(l_s(list_main))

def shift_right(bit_string,n):
    bit_list=s_l(bit_string)
    count=0
    while count <= n-1:
        bit_list.pop(-1)
        count+=1
    front_append=['0']*n
    return(l_s(front_append+bit_list))

def mod_32_addition(input_set):
    value=0
    for i in range(len(input_set)):
        value+=input_set[i]
    mod_32 = 4294967296
    return(value%mod_32)

def xor_2str(bit_string_1,bit_string_2):
    xor_list=[]
    for i in range(len(bit_string_1)):
        if bit_string_1[i]=='0' and bit_string_2[i]=='0':
            xor_list.append('0')
        if bit_string_1[i]=='1' and bit_string_2[i]=='1':
            xor_list.append('0')
        if bit_string_1[i]=='0' and bit_string_2[i]=='1':
            xor_list.append('1')
        if bit_string_1[i]=='1' and bit_string_2[i]=='0':
            xor_list.append('1')
    return(l_s(xor_list))

def and_2str(bit_string_1,bit_string_2):
    and_list=[]
    for i in range(len(bit_string_1)):
        if bit_string_1[i]=='1' and bit_string_2[i]=='1':
            and_list.append('1')
        else:
            and_list.append('0')
            
    return(l_s(and_list))

def or_2str(bit_string_1,bit_string_2):
    or_list=[]
    for i in range(len(bit_string_1)):
        if bit_string_1[i]=='0' and bit_string_2[i]=='0':
            or_list.append('0')
        else:
            or_list.append('1')
    return(l_s(or_list))

def not_str(bit_string):
    not_list=[]
    for i in range(len(bit_string)):
        if bit_string[i]=='0':
            not_list.append('1')
        else:
            not_list.append('0')
    return(l_s(not_list))

'''
SHA-256 Specific Functions:
'''

def Ch(x,y,z):
    return(xor_2str(and_2str(x,y),and_2str(not_str(x),z)))

def Maj(x,y,z):
    return(xor_2str(xor_2str(and_2str(x,y),and_2str(x,z)),and_2str(y,z)))

def e_0(x):
    return(xor_2str(xor_2str(rotate_right(x,2),rotate_right(x,13)),rotate_right(x,22)))

def e_1(x):
    return(xor_2str(xor_2str(rotate_right(x,6),rotate_right(x,11)),rotate_right(x,25)))

def s_0(x):
    return(xor_2str(xor_2str(rotate_right(x,7),rotate_right(x,18)),shift_right(x,3)))

def s_1(x):
    return(xor_2str(xor_2str(rotate_right(x,17),rotate_right(x,19)),shift_right(x,10)))

def message_pad(bit_list):
    pad_one = bit_list + '1'
    pad_len = len(pad_one)
    k=0
    while ((pad_len+k)-448)%512 != 0:
        k+=1
    back_append_0 = '0'*k
    back_append_1 = bin_64bit(len(bit_list))
    return(pad_one+back_append_0+back_append_1)
    
def message_bit_return(string_input):
    bit_list=[]
    for i in range(len(string_input)):
        bit_list.append(bin_8bit(ord(string_input[i])))
    return(l_s(bit_list))

def message_pre_pro(input_string):
    bit_main = message_bit_return(input_string)
    return(message_pad(bit_main))

def message_parsing(input_string):
    return(L_P(message_pre_pro(input_string),32))

def message_schedule(index,w_t):
    new_word = bin_32bit(mod_32_addition([int(s_1(w_t[index-2]),2),int(w_t[index-7],2),int(s_0(w_t[index-15]),2),int(w_t[index-16],2)]))
    return(new_word)

'''
This example of SHA_256 works for an input string <56 characters.
'''

def sha_256(input_string):
    assert len(input_string) < 56, "This example of SHA_256 works for an input string <56 characters."
    w_t=message_parsing(input_string)
    a=bin_32bit(dec_return_hex(initial_hash_values[0]))
    b=bin_32bit(dec_return_hex(initial_hash_values[1]))
    c=bin_32bit(dec_return_hex(initial_hash_values[2]))
    d=bin_32bit(dec_return_hex(initial_hash_values[3]))
    e=bin_32bit(dec_return_hex(initial_hash_values[4]))
    f=bin_32bit(dec_return_hex(initial_hash_values[5]))
    g=bin_32bit(dec_return_hex(initial_hash_values[6]))
    h=bin_32bit(dec_return_hex(initial_hash_values[7]))
    for i in range(0,64):
        if i <= 15: 
            t_1=mod_32_addition([int(h,2),int(e_1(e),2),int(Ch(e,f,g),2),int(sha_256_constants[i],16),int(w_t[i],2)])
            t_2=mod_32_addition([int(e_0(a),2),int(Maj(a,b,c),2)])
            h=g
            g=f
            f=e
            e=mod_32_addition([int(d,2),t_1])
            d=c
            c=b
            b=a 
            a=mod_32_addition([t_1,t_2])
            a=bin_32bit(a)
            e=bin_32bit(e)
        if i > 15:
            w_t.append(message_schedule(i,w_t))
            t_1=mod_32_addition([int(h,2),int(e_1(e),2),int(Ch(e,f,g),2),int(sha_256_constants[i],16),int(w_t[i],2)])
            t_2=mod_32_addition([int(e_0(a),2),int(Maj(a,b,c),2)])
            h=g
            g=f
            f=e
            e=mod_32_addition([int(d,2),t_1])
            d=c
            c=b
            b=a 
            a=mod_32_addition([t_1,t_2])
            a=bin_32bit(a)
            e=bin_32bit(e)
    hash_0 = mod_32_addition([dec_return_hex(initial_hash_values[0]),int(a,2)])
    hash_1 = mod_32_addition([dec_return_hex(initial_hash_values[1]),int(b,2)])
    hash_2 = mod_32_addition([dec_return_hex(initial_hash_values[2]),int(c,2)])
    hash_3 = mod_32_addition([dec_return_hex(initial_hash_values[3]),int(d,2)])
    hash_4 = mod_32_addition([dec_return_hex(initial_hash_values[4]),int(e,2)])
    hash_5 = mod_32_addition([dec_return_hex(initial_hash_values[5]),int(f,2)])
    hash_6 = mod_32_addition([dec_return_hex(initial_hash_values[6]),int(g,2)])
    hash_7 = mod_32_addition([dec_return_hex(initial_hash_values[7]),int(h,2)])
    final_hash = (hex_return(hash_0),
                  hex_return(hash_1),
                  hex_return(hash_2),
                  hex_return(hash_3),
                  hex_return(hash_4),
                  hex_return(hash_5),
                  hex_return(hash_6),
                  hex_return(hash_7))
    return(final_hash)

9 Comments

That code looks like it has some license attached that would prevent you from posting it here without attribution.
@MattTimmermans I wrote the script. What is it about the code that makes you think that? The magic happens in int() and format()
Is this tested? I got a different result when calling your print(str(sha_256("hi"))) than when I ran echo "hi" | sha256sum
Ok, I fixed it and summited an edit with the new code. I hoped I helped! (:
This code is not fixed yet! This won't work if the message is greater than 448 bits. Try print(str(sha_256("hello world hello world hello world hello world hello wo"))). The correct value should be 5e1440c967b41e15a9c816b6f8c93195deaaceebc3b7a50293c61dfa7725f221 . To fix this 512 chunks algorithm must be implemented.
|
5

Some time ago I was also studying SHA-256 and created pure-python class that implements this hash. If I remember correctly, mostly I've taken algorithm from Wikipedia SHA-256 Pseudocode and partially from some open-source projects.

Algorithm doesn't import any (even standard) modules. Of cause it is much slower than hashlib's variant and only meant for studying.

If you just run the script it executes 1000 tests comparing hashlib's and my variants. Only testing function imports some modules, algorithm's class itself doesn't need any modules. Interface is same as in hashlib's sha256 class. See test() function for examples of usage.

Try it online!

class Sha256:
    ks = [
        0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
        0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
        0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
        0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
        0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
        0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
        0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
        0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
        0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
        0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
        0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
        0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
        0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
        0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
        0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
        0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
    ]

    hs = [
        0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
        0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19,
    ]

    M32 = 0xFFFFFFFF

    def __init__(self, m = None):
        self.mlen = 0
        self.buf = b''
        self.k = self.ks[:]
        self.h = self.hs[:]
        self.fin = False
        if m is not None:
            self.update(m)

    @staticmethod
    def pad(mlen):
        mdi = mlen & 0x3F
        length = (mlen << 3).to_bytes(8, 'big')
        padlen = 55 - mdi if mdi < 56 else 119 - mdi
        return b'\x80' + b'\x00' * padlen + length

    @staticmethod
    def ror(x, y):
        return ((x >> y) | (x << (32 - y))) & Sha256.M32

    @staticmethod
    def maj(x, y, z):
        return (x & y) ^ (x & z) ^ (y & z)

    @staticmethod
    def ch(x, y, z):
        return (x & y) ^ ((~x) & z)

    def compress(self, c):
        w = [0] * 64
        w[0 : 16] = [int.from_bytes(c[i : i + 4], 'big') for i in range(0, len(c), 4)]

        for i in range(16, 64):
            s0 = self.ror(w[i - 15],  7) ^ self.ror(w[i - 15], 18) ^ (w[i - 15] >>  3)
            s1 = self.ror(w[i -  2], 17) ^ self.ror(w[i -  2], 19) ^ (w[i -  2] >> 10)
            w[i] = (w[i - 16] + s0 + w[i - 7] + s1) & self.M32

        a, b, c, d, e, f, g, h = self.h

        for i in range(64):
            s0 = self.ror(a, 2) ^ self.ror(a, 13) ^ self.ror(a, 22)
            t2 = s0 + self.maj(a, b, c)
            s1 = self.ror(e, 6) ^ self.ror(e, 11) ^ self.ror(e, 25)
            t1 = h + s1 + self.ch(e, f, g) + self.k[i] + w[i]

            h = g
            g = f
            f = e
            e = (d + t1) & self.M32
            d = c
            c = b
            b = a
            a = (t1 + t2) & self.M32

        for i, (x, y) in enumerate(zip(self.h, [a, b, c, d, e, f, g, h])):
            self.h[i] = (x + y) & self.M32

    def update(self, m):
        if m is None or len(m) == 0:
            return

        assert not self.fin, 'Hash already finalized and can not be updated!'

        self.mlen += len(m)
        m = self.buf + m

        for i in range(0, len(m) // 64):
            self.compress(m[64 * i : 64 * (i + 1)])

        self.buf = m[len(m) - (len(m) % 64):]

    def digest(self):
        if not self.fin:
            self.update(self.pad(self.mlen))
            self.digest = b''.join(x.to_bytes(4, 'big') for x in self.h[:8])
            self.fin = True
        return self.digest

    def hexdigest(self):
        tab = '0123456789abcdef'
        return ''.join(tab[b >> 4] + tab[b & 0xF] for b in self.digest())

def test():
    import secrets, hashlib, random
    for itest in range(500):
        data = secrets.token_bytes(random.randrange(257))
        a, b = hashlib.sha256(data).hexdigest(), Sha256(data).hexdigest()
        assert a == b, (a, b)
    for itest in range(500):
        a, b = hashlib.sha256(), Sha256()
        for j in range(random.randrange(10)):
            data = secrets.token_bytes(random.randrange(129))
            a.update(data)
            b.update(data)
        a, b = a.hexdigest(), b.hexdigest()
        assert a == b, (a, b)
    print('Sha256 tested successfully.')

if __name__ == '__main__':
    test()

1 Comment

works perfectly, thx
-1

If you only want the hash value:

from hashlib import sha256
data = input('Enter plaintext data: ')
output = sha256(data.encode('utf-8'))
print(output)

Python's hashlib also has SHA-1, SHA-384, SHA-512, and MD5 hash functions.

1 Comment

Is Python built-in Hashlib widely used in industry? If not which tool is used among Pycrytpodome, Cryptogrphy or Pynacl?
-1

Here is my proposition with redis:

for i in range(len(rserver.keys())):
    mdp_hash = rserver.get(rserver.keys()[i])
    rserver.set(rserver.keys()[i], hashlib.sha256(mdp_hash.encode()).hexdigest())

Comments

-2

Translating http://en.wikipedia.org/wiki/SHA-2#SHA-256_.28a_SHA-2_variant.29_pseudocode to Python should be straight forward.

1 Comment

This harder than it sounds because all the input / output conversion logic is not presented, and that is fairly non-trivial.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.