[python] remove control characters and all punctuations - dsindex/blog GitHub Wiki
#!/usr/bin/env python
#-*- coding: utf8 -*-
from unicodedata import category
s = s.decode('utf-8')
s = ''.join(ch for ch in s if category(ch)[0] != 'C')
s = ''.join(ch for ch in s if category(ch)[0] != 'P')