-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCipherUtils.py
236 lines (188 loc) · 7.07 KB
/
CipherUtils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
import random
import math
import numpy as np
class CipherGenerator:
def generate_cipher(self):
"""
Generates a random cipher, which is a shuffled version of the alphabet.
Returns:
list: A list representing the generated cipher.
"""
alphabet = list("abcdefghijklmnopqrstuvwxyz")
random.shuffle(alphabet)
return alphabet
"""def generate_double_cipher(self):
Generates a double cipher, which is a combination of two randomly shuffled ciphers.
Returns:
list: A list representing the generated double cipher.
The double cipher is created by generating two random ciphers and combining them. Each cipher
is a shuffled version of the alphabet. The double cipher is formed by randomly selecting elements
from either of the two ciphers based on a binary choice.
cipher1 = self.generate_cipher()
cipher2 = self.generate_cipher()
output = [0] * len(cipher1)
choice = np.random.binomial(1, 0.5, len(cipher1))
for i in range(len(cipher1)):
if choice[i] == 0:
output[i] = cipher1[i]
else:
output[i] = cipher2[i]
return output
"""
class TextEncoder:
def encode_text(self, text, cipher):
"""
Encodes the given text using a cipher.
Args:
text (str): The input text to encode.
cipher (list): The cipher to use for encoding.
Returns:
str: The encoded text.
"""
encoded_text = []
for char in text:
if char.isalpha():
index = ord(char.lower()) - ord("a")
encoded_char = cipher[index]
if char.isupper():
encoded_char = encoded_char.upper()
encoded_text.append(encoded_char)
else:
encoded_text.append(char)
return "".join(encoded_text)
def encode_text_double_cipher(self, text, cipher1, cipher2):
"""
Encode thr given text using two ciphers.
Returns:
str: The encoded text.
The double cipher encoder is created from two random ciphers and combining them. Each cipher
is a shuffled version of the alphabet. The double cipher is formed by randomly selecting elements
from either of the two ciphers based on a binary choice.
WARNING: class TextDecoder is not define for text encoded by a double cipher.
"""
encoded_text = []
choice = np.random.binomial(1, 0.5, len(text))
for i in range(len(text)):
if text[i].isalpha():
index = ord(text[i].lower()) - ord("a")
if choice[i] == 0:
encoded_char = cipher1[index]
else:
encoded_char = cipher2[index]
if text[i].isupper():
encoded_char = encoded_char.upper()
encoded_text.append(encoded_char)
else:
encoded_text.append(text[i])
return "".join(encoded_text)
class TextDecoder:
def decode_text(self, text, cipher):
"""
Decodes the given text using a cipher.
Args:
text (str): The input text to decode.
cipher (list): The cipher to use for decoding.
Returns:
str: The decoded text.
"""
decoded_text = []
for char in text:
if char.isalpha():
index = cipher.index(char.lower())
decoded_char = chr(index + ord("a"))
if char.isupper():
decoded_char = decoded_char.upper()
decoded_text.append(decoded_char)
else:
decoded_text.append(char)
return "".join(decoded_text)
"""
class TextLik:
def get_log_likelihood(self, text, probability_table):
""
Calculates the log-likelihood of the given text based on a probability table.
Args:
text (str): The input text.
probability_table (dict): A probability table mapping two-character sequences to their probabilities.
Returns:
float: The log-likelihood of the text.
""
two_char_list = [text[i : i + 2] for i in range(len(text) - 1)]
probabilities = [
probability_table.get(two_char, 1 / len(probability_table))
for two_char in two_char_list
]
log_likelihood = sum(math.log(prob) for prob in probabilities)
return log_likelihood
"""
class TextPreProcessor:
def __init__(self, alphabet=list("abcdefghijklmnopqrstuvwxyz")):
"""
Initializes the TextPreProcessor object with an alphabet.
Args:
alphabet (list, optional): The list of valid characters. Defaults to the lowercase alphabet.
"""
self.alphabet = alphabet
def has_uppercase(self, text):
"""
Checks if the text contains uppercase characters.
Args:
text (str): The input text.
Returns:
bool: True if the text contains uppercase characters, False otherwise.
"""
return any(char.isupper() for char in text)
def lower(self, text):
"""
Converts the text to lowercase.
Args:
text (str): The input text.
Returns:
str: The text converted to lowercase.
"""
return text.lower()
def unknown_chars(self, text):
"""
Finds the unknown characters in the text that are not present in the alphabet.
Args:
text (str): The input text.
Returns:
list: A list of unknown characters found in the text.
"""
if self.has_uppercase(text):
raise ImportWarning(
f"text={text} has upper case. Preprocess it using TextPreProcess class."
)
unknown_chars = []
for char in text:
if char not in self.alphabet and char not in unknown_chars:
unknown_chars.append(char)
return unknown_chars
def remove_unknown_chars(self, text, unknown_chars):
"""
Removes the unknown characters from the text.
Args:
text (str): The input text.
unknown_chars (list): A list of unknown characters to be removed.
Returns:
str: The text with unknown characters removed.
"""
for char in unknown_chars:
text = text.replace(char, " ")
return text
def remove_additional_spaces(self, text):
"""
Removes multiple whitespaces with only one if there are some.
Args:
text (str): The text to remove unknown characters from
"""
text = " ".join(text.split())
return text
def save_text(self, text):
"""
Saves the preprocessed text to a file named 'text_preprocessed.txt'.
Args:
text (str): The preprocessed text.
"""
with open("outputs/text_preprocessed.txt", "w") as file:
print(text, file=file)