# 2.三叉搜索树

## 2.3 三叉搜索树构造

``````def outputBalancedDict(outputDict, sort_dict, offset, length):
if length < 1:
return
mid = length / 2
outputDict.append(sort_dict[mid + offset])
outputBalancedDict(outputDict, sort_dict, offset, mid)
outputBalancedDict(outputDict, sort_dict, offset + mid + 1, length - mid -1)

def sortDict(rawDict):
"""
中文排序
"""
import locale
return sorted(rawDict, cmp=locale.strcoll)
``````

``````class TSNode(object):
def __init__(self, splitchar):
#左边、中间和右边节点
self.loNode = None
self.eqNode = None
self.hiNode = None
#保存的字符
self.splitchar = splitchar
#终止节点保存word
self.nodeValue = ""

def toString(self):
return self.splitchar

class TernarySearchTrie(object):
def __init__(self, root):
self.root = root

def getNode(self, key, startNode):
"""
查找词key
"""
if key == "" or key is None:
return None
length = len(key)
if length == 0:
return None
currentNode = startNode
charIndex = 0
cmpChar = key[charIndex]
cmpRes = -1
while 1:
if currentNode is None:
return None
cmpRes = cmp(cmpChar, currentNode.splitchar)
if cmpRes == 0:
####两个字符相等
charIndex += 1
if charIndex == length:
return currentNode
else:
cmpChar = key[charIndex]
currentNode = currentNode.eqNode
elif cmpRes < 0:
currentNode = currentNode.loNode
else:
currentNode = currentNode.hiNode

currentNode = self.root
charIndex = 0
cmpRes = -1
while 1:
cmpRes = cmp(key[charIndex], currentNode.splitchar)
if cmpRes == 0:
charIndex += 1
if charIndex == len(key):
return currentNode
if currentNode.eqNode is None:
currentNode.eqNode = TSNode(key[charIndex])
currentNode = currentNode.eqNode
elif cmpRes < 0:
if currentNode.loNode is None:
currentNode.loNode = TSNode(key[charIndex])
currentNode = currentNode.loNode
else:
if currentNode.hiNode is None:
currentNode.hiNode = TSNode(key[charIndex])
currentNode = currentNode.hiNode
``````

# 3.中文切词

``````class Segment(object):
def __init__(self, root, text):
self.tst = root
##当前切分位置
self.offset = 0
self.text = text

def splitWordWithMaxMatch(self):
"""
搜索三叉树并每次返回最长匹配串
"""
word = ""
if self.text == "" or self.tst is None:
return word
if self.offset >= len(self.text):
return word
charIndex = self.offset
currentNode = self.tst
while 1:
if currentNode is None:
if word == "":
word = self.text[self.offset+1]
self.offset += 1
return word
cmpRes = cmp(self.text[charIndex], currentNode.splitchar)
if cmpRes == 0:
charIndex += 1
if currentNode.nodeValue != "":
word = currentNode.nodeValue
self.offset = charIndex
if charIndex == len(self.text):
return word
currentNode = currentNode.eqNode
elif cmpRes < 0:
currentNode = currentNode.loNode
else:
currentNode = currentNode.hiNode
``````

# 4. 实验

``````                                                                              大
/  |  \
中    学    活
|     |  /  |  \
心    生 心  动    生
|
活
``````

# 5. github地址

https://github.com/wyz1989/Segment