271. Encode and Decode Strings - cocoder39/coco39_LC GitHub Wiki

271. Encode and Decode Strings

Option 1: use non-ASCII code to separate strings

special care should be taken for []: [''] -> '' -> []

class Codec:
    __sepatator = chr(257)

    def encode(self, strs: List[str]) -> str:
        """Encodes a list of strings to a single string.
        """
        if not strs:
            return ''

        return Codec.__sepatator.join(strs)
        

    def decode(self, s: str) -> List[str]:
        """Decodes a single string to a list of strings.
        """
        if not s:
            return []

        return s.split(Codec.__sepatator)

Option 2: encoding length along with string

When input string is not limited to ascii code, we cannot rely on separator. This is the generally encoding solution to process data stream

class Codec:
    def encode(self, strs: List[str]) -> str:
        """Encodes a list of strings to a single string.
        """
        output = []
        for string in strs:
            output.append(self.lenToStr(string) + string)
        return ''.join(output)

    def decode(self, s: str) -> List[str]:
        """Decodes a single string to a list of strings.
        """
        i, n = 0, len(s)
        output = []
        while i < n:
            length = self.strToLen(s[i:i+4])
            i += 4
            output.append(s[i:i+length])
            i += length
        return output

    def lenToStr(self, string):
        n = len(string)
        lenBytes = []
        for i in range(4):
            byte = (n >> (i * 8)) & 0xff # 0xff = 15*16+15 = 255
            lenBytes.append(chr(byte))
        lenBytes.reverse()
        return ''.join(lenBytes)

    def strToLen(self, string):
        res = 0
        for ch in string:
            res = res * 256 + ord(ch)
        return res

Follow up:

type Data = Array<string | number | Array<string | number>>
function encode(to_send: Data) => string
function decode(received: string) => Data

Solution: encoding length and also type (ie number vs string vs array). When type is array, length is the encoded result of the internal array (opportunity of using recurision)
import numbers

class Codec:
    def encode(self, arr) -> str:
        output = []
        for element in arr:
            output.append(self.encodeElement(element))
        return ''.join(output)

    def decode(self, s: str) -> list:
        i, n = 0, len(s)
        output = []
        while i < n:
            length = self.decodeLength(s[i:i + 4])
            i += 4
            type = s[i]
            i += 1
            decoededElement = self.decodeElement(type, s[i:i+length])
            i += length
            output.append(decoededElement)
        return output

    def encodeElement(self, element):
        encodeType = self.encodeType(element)
        if encodeType != '0':
            if encodeType == '1':
                element = str(element)
            return self.encodeLength(len(element)) + encodeType + element
        else:
            encodedArr = self.encode(element)
            return self.encodeLength(len(encodedArr)) + encodeType + encodedArr

    def decodeElement(self, type, string):
        if type == '1':
            return int(string)
        if type == '2':
            return string
        if type == '0':
            return self.decode(string)
        raise Exception('unsupported type {}'.format(type))

    def encodeType(self, e):
        if isinstance(e, list):
            return '0'
        if isinstance(e, numbers.Number):
            return '1'
        if isinstance(e, str):
            return '2'
        raise Exception('unsupported instance type {}'.format(e))

    def encodeLength(self, n):
        lenBytes = []
        for i in range(4):
            byte = (n >> (i * 8)) & 0xff # 0xff = 15*16+15 = 255
            lenBytes.append(chr(byte))
        lenBytes.reverse()
        return ''.join(lenBytes)

    def decodeLength(self, string):
        res = 0
        for ch in string:
            res = res * 256 + ord(ch)
        return res

# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    tests = [
        [],
        ['', 'b', ''],
        ['a', 'b', 'c'],
        [['a', 'b'], 12, '13', ['5', 'abc', 7]]
    ]

    codec = Codec()
    for test in tests:
        encodedTest = codec.encode(test)
        decodedTest = codec.decode(encodedTest)
        print(encodedTest, '_', decodedTest)
        assert test == decodedTest
    print('pass')

========================================================================

use auto pos = s.find_first_of("#", start) to find the index of first "#" after start

class Codec {
public:
    // Encodes a list of strings to a single string.
    string encode(vector<string>& strs) {
        string res;
        for (int i = 0; i < strs.size(); i++) {
            int len = strs[i].length();
            res += to_string(len) + '#' + strs[i];
        }
        return res;
    }

    // Decodes a single string to a list of strings.
    vector<string> decode(string s) {
        vector<string> res;
        int start = 0;
        while (start < s.length()) {
            auto pos = s.find_first_of("#", start); //searches index after start
            int len = stoi(s.substr(start, pos - start));
            res.push_back(s.substr(pos + 1, len));
            start = pos + len + 1;
        }
        return res;
    }
};
⚠️ **GitHub.com Fallback** ⚠️