流密码与汉明距离
学习出处:https://www.anquanke.com/post/id/161171#h3-6
题目 1 2 3 4 5 6 7 8 9 10 11 12 from itertools import *from data import flag,plainkey=flag.strip("de1ctf{" ).strip("}" ) assert (len (key)<38 )salt="WeAreDe1taTeam" ki=cycle(key) si=cycle(salt) cipher = '' .join([hex (ord (p) ^ ord (next (ki)) ^ ord (next (si)))[2 :].zfill(2 ) for p in plain]) print cipheroutput: 49380d773440222d1b421b3060380c3f403c3844791b202651306721135b6229294a3c3222357e766b2f15561b35305e3c3b670e49382c295c6c170553577d3a2b791470406318315d753f03637f2b614a4f2e1c4f21027e227a4122757b446037786a7b0e37635024246d60136f7802543e4d36265c3e035a725c6322700d626b345d1d6464283a016f35714d434124281b607d315f66212d671428026a4f4f79657e34153f3467097e4e135f187a21767f02125b375563517a3742597b6c394e78742c4a725069606576777c314429264f6e330d7530453f22537f5e3034560d22146831456b1b72725f30676d0d5c71617d48753e26667e2f7a334c731c22630a242c7140457a42324629064441036c7e646208630e745531436b7c51743a36674c4f352a5575407b767a5c747176016c0676386e403a2b42356a727a04662b4446375f36265f3f124b724c6e346544706277641025063420016629225b43432428036f29341a2338627c47650b264c477c653a67043e6766152a485c7f33617264780656537e5468143f305f4537722352303c3d4379043d69797e6f3922527b24536e310d653d4c33696c635474637d0326516f745e610d773340306621105a7361654e3e392970687c2e335f3015677d4b3a724a4659767c2f5b7c16055a126820306c14315d6b59224a27311f747f336f4d5974321a22507b22705a226c6d446a37375761423a2b5c29247163046d7e47032244377508300751727126326f117f7a38670c2b23203d4f27046a5c5e1532601126292f577776606f0c6d0126474b2a73737a41316362146e581d7c1228717664091c
分析 先去掉盐值,使mid为密钥流与明文异或结果
通过计算汉明距离求出密钥长度
再进行字频分析推测出密钥
知识点 汉明距离
汉明距离其实是在二进制层面观测两个等长字符串的比特位差异。
取等长二进制进行异或,最后对应为1的比特位个数即为汉明距离。
1 2 3 def hamming_distance (a, b ): xor_ = bin (int (a, 16 ) ^ int (b, 16 ))[2 :] return xor_.count('1' )
汉明距离与猜测密钥长度
大小写英文字符两两的平均Hamming距离为2 ~ 3,而任意字符两两的平均Hamming距离为4。所以猜测到正确的密钥时,两段密文异或即对应的明文异或,这时两段的汉明距离应该趋于最小。取多段密文计算汉明距离再求平均,即可以推测出密钥长度。
推断密钥 求出密钥长度为30后,进行字频分析
字频表如下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 freq = {} freq[' ' ] = 700000000 freq['e' ] = 390395169 freq['t' ] = 282039486 freq['a' ] = 248362256 freq['o' ] = 235661502 freq['i' ] = 214822972 freq['n' ] = 214319386 freq['s' ] = 196844692 freq['h' ] = 193607737 freq['r' ] = 184990759 freq['d' ] = 134044565 freq['l' ] = 125951672 freq['u' ] = 88219598 freq['c' ] = 79962026 freq['m' ] = 79502870 freq['f' ] = 72967175 freq['w' ] = 69069021 freq['g' ] = 61549736 freq['y' ] = 59010696 freq['p' ] = 55746578 freq['b' ] = 47673928 freq['v' ] = 30476191 freq['k' ] = 22969448 freq['x' ] = 5574077 freq['j' ] = 4507165 freq['q' ] = 3649838 freq['z' ] = 2456495
将每位密钥对应的所有密文分为一组
将一组密文与0~256异或解出对应的明文
计算这组明文的频率分数
分数最高的字符即是对应的密钥位
解题 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 from itertools import *salt = "WeAreDe1taTeam" si = cycle(salt) cipher = '49380d773440222d1b421b3060380c3f403c3844791b202651306721135b6229294a3c3222357e766b2f15561b35305e3c3b670e49382c295c6c170553577d3a2b791470406318315d753f03637f2b614a4f2e1c4f21027e227a4122757b446037786a7b0e37635024246d60136f7802543e4d36265c3e035a725c6322700d626b345d1d6464283a016f35714d434124281b607d315f66212d671428026a4f4f79657e34153f3467097e4e135f187a21767f02125b375563517a3742597b6c394e78742c4a725069606576777c314429264f6e330d7530453f22537f5e3034560d22146831456b1b72725f30676d0d5c71617d48753e26667e2f7a334c731c22630a242c7140457a42324629064441036c7e646208630e745531436b7c51743a36674c4f352a5575407b767a5c747176016c0676386e403a2b42356a727a04662b4446375f36265f3f124b724c6e346544706277641025063420016629225b43432428036f29341a2338627c47650b264c477c653a67043e6766152a485c7f33617264780656537e5468143f305f4537722352303c3d4379043d69797e6f3922527b24536e310d653d4c33696c635474637d0326516f745e610d773340306621105a7361654e3e392970687c2e335f3015677d4b3a724a4659767c2f5b7c16055a126820306c14315d6b59224a27311f747f336f4d5974321a22507b22705a226c6d446a37375761423a2b5c29247163046d7e47032244377508300751727126326f117f7a38670c2b23203d4f27046a5c5e1532601126292f577776606f0c6d0126474b2a73737a41316362146e581d7c1228717664091c' mid = '' for i in range (0 , len (cipher), 2 ): mid += hex (int (cipher[i:i + 2 ], 16 ) ^ ord (next (si)))[2 :].zfill(2 ) def hamming_distance (a, b ): xor_ = bin (int (a, 16 ) ^ int (b, 16 ))[2 :] return xor_.count('1' ) def key_len (): ave = [] for keysize in range (2 , 38 ): b = [ mid[:keysize * 2 ], mid[keysize * 2 :keysize * 4 ], mid[keysize * 4 :keysize * 6 ], mid[keysize * 6 :keysize * 8 ], mid[keysize * 8 :keysize * 10 ], mid[keysize * 10 :keysize * 12 ], ] print (b) distance = [ hamming_distance(b[x], b[x + 1 ]) for x in range (len (b) - 1 ) ] ave.append((sum (distance) / (keysize * 5 ), keysize)) return sorted (ave)[0 ][1 ] def score (s ): freq = {} freq[' ' ] = 700000000 freq['e' ] = 390395169 freq['t' ] = 282039486 freq['a' ] = 248362256 freq['o' ] = 235661502 freq['i' ] = 214822972 freq['n' ] = 214319386 freq['s' ] = 196844692 freq['h' ] = 193607737 freq['r' ] = 184990759 freq['d' ] = 134044565 freq['l' ] = 125951672 freq['u' ] = 88219598 freq['c' ] = 79962026 freq['m' ] = 79502870 freq['f' ] = 72967175 freq['w' ] = 69069021 freq['g' ] = 61549736 freq['y' ] = 59010696 freq['p' ] = 55746578 freq['b' ] = 47673928 freq['v' ] = 30476191 freq['k' ] = 22969448 freq['x' ] = 5574077 freq['j' ] = 4507165 freq['q' ] = 3649838 freq['z' ] = 2456495 score = 0 for i in s.lower(): if i in freq: score += freq[i] return score def guess_key (ci ): ci = bytes .fromhex(ci) Score = [] for i in range (256 ): xor = [] for x in ci: xor.append(chr (x ^ i)) s = '' .join(xor) Score.append(score(s)) print (s) return chr (Score.index(max (Score))) keysize = key_len() c = ['' ] * keysize key = [] byte = bytes .fromhex(mid) for i in range (len (byte)): c[i % keysize] += hex (byte[i])[2 :].zfill(2 ) for i in c: key.append(guess_key(i)) print ('' .join(key)