[python] remove control characters and all punctuations - dsindex/blog GitHub Wiki


#!/usr/bin/env python
#-*- coding: utf8 -*-

from   unicodedata import category 

s = s.decode('utf-8')
s = ''.join(ch for ch in s if category(ch)[0] != 'C')
s = ''.join(ch for ch in s if category(ch)[0] != 'P')