271. Encode and Decode Strings - cocoder39/coco39_LC GitHub Wiki
271. Encode and Decode Strings
Option 1: use non-ASCII code to separate strings
special care should be taken for []: [''] -> '' -> []
class Codec:
__sepatator = chr(257)
def encode(self, strs: List[str]) -> str:
"""Encodes a list of strings to a single string.
"""
if not strs:
return ''
return Codec.__sepatator.join(strs)
def decode(self, s: str) -> List[str]:
"""Decodes a single string to a list of strings.
"""
if not s:
return []
return s.split(Codec.__sepatator)
Option 2: encoding length along with string
When input string is not limited to ascii code, we cannot rely on separator. This is the generally encoding solution to process data stream
class Codec:
def encode(self, strs: List[str]) -> str:
"""Encodes a list of strings to a single string.
"""
output = []
for string in strs:
output.append(self.lenToStr(string) + string)
return ''.join(output)
def decode(self, s: str) -> List[str]:
"""Decodes a single string to a list of strings.
"""
i, n = 0, len(s)
output = []
while i < n:
length = self.strToLen(s[i:i+4])
i += 4
output.append(s[i:i+length])
i += length
return output
def lenToStr(self, string):
n = len(string)
lenBytes = []
for i in range(4):
byte = (n >> (i * 8)) & 0xff # 0xff = 15*16+15 = 255
lenBytes.append(chr(byte))
lenBytes.reverse()
return ''.join(lenBytes)
def strToLen(self, string):
res = 0
for ch in string:
res = res * 256 + ord(ch)
return res
Follow up:
type Data = Array<string | number | Array<string | number>>
function encode(to_send: Data) => string
function decode(received: string) => Data
Solution: encoding length and also type (ie number vs string vs array). When type is array, length is the encoded result of the internal array (opportunity of using recurision)
import numbers
class Codec:
def encode(self, arr) -> str:
output = []
for element in arr:
output.append(self.encodeElement(element))
return ''.join(output)
def decode(self, s: str) -> list:
i, n = 0, len(s)
output = []
while i < n:
length = self.decodeLength(s[i:i + 4])
i += 4
type = s[i]
i += 1
decoededElement = self.decodeElement(type, s[i:i+length])
i += length
output.append(decoededElement)
return output
def encodeElement(self, element):
encodeType = self.encodeType(element)
if encodeType != '0':
if encodeType == '1':
element = str(element)
return self.encodeLength(len(element)) + encodeType + element
else:
encodedArr = self.encode(element)
return self.encodeLength(len(encodedArr)) + encodeType + encodedArr
def decodeElement(self, type, string):
if type == '1':
return int(string)
if type == '2':
return string
if type == '0':
return self.decode(string)
raise Exception('unsupported type {}'.format(type))
def encodeType(self, e):
if isinstance(e, list):
return '0'
if isinstance(e, numbers.Number):
return '1'
if isinstance(e, str):
return '2'
raise Exception('unsupported instance type {}'.format(e))
def encodeLength(self, n):
lenBytes = []
for i in range(4):
byte = (n >> (i * 8)) & 0xff # 0xff = 15*16+15 = 255
lenBytes.append(chr(byte))
lenBytes.reverse()
return ''.join(lenBytes)
def decodeLength(self, string):
res = 0
for ch in string:
res = res * 256 + ord(ch)
return res
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
tests = [
[],
['', 'b', ''],
['a', 'b', 'c'],
[['a', 'b'], 12, '13', ['5', 'abc', 7]]
]
codec = Codec()
for test in tests:
encodedTest = codec.encode(test)
decodedTest = codec.decode(encodedTest)
print(encodedTest, '_', decodedTest)
assert test == decodedTest
print('pass')
========================================================================
use auto pos = s.find_first_of("#", start)
to find the index of first "#"
after start
class Codec {
public:
// Encodes a list of strings to a single string.
string encode(vector<string>& strs) {
string res;
for (int i = 0; i < strs.size(); i++) {
int len = strs[i].length();
res += to_string(len) + '#' + strs[i];
}
return res;
}
// Decodes a single string to a list of strings.
vector<string> decode(string s) {
vector<string> res;
int start = 0;
while (start < s.length()) {
auto pos = s.find_first_of("#", start); //searches index after start
int len = stoi(s.substr(start, pos - start));
res.push_back(s.substr(pos + 1, len));
start = pos + len + 1;
}
return res;
}
};