源代码注释
#获取数字编号
def numCode(s1_str):
#s1_str = '|'.join(s_str)
tempNum = 1
saveNum = []
for s1_s in s1_str:
if s1_s != '/':
saveNum.append(tempNum)
tempNum = tempNum + 1
return saveNum
#三种分词方法进行融合
def decisonThree(s,s1,s2,s3):
# s = ['我', '是', '招', '商', '银', '行', '的', '一', '员', '。']
# s1 = ['我', '是', '招商', '银行的', '一员。']
# s2 = ['我是', '招商', '银行', '的', '一员', '。']
# s3 = ['我', '是招', '商', '银行', '的', '一员', '。']
a1 = numCode(s1) #我爱你/,/桂/书品/!
b1 = numCode(s2)
c1 = numCode(s3)
print(s)
print(s1)#我爱你/,/桂/书品/!
print(a1)#[1, 2, 3, 5, 7, 9, 10, 12] 遇到/,序号+1,不添加到列表中
print(s2)#我/爱/你/,/桂/书品/!
print(b1)#[1, 3, 5, 7, 9, 11, 12, 14]
print(s3)#我/爱/你/,/桂/书品/!
print(c1)#[1, 3, 5, 7, 9, 11, 12, 14]
d1 = []
for i in range(len(a1)):
if ((a1[i] == b1[i]) and (a1[i] == c1[i])):
d1.append(a1[i])
# continue
elif a1[i] == b1[i]:
d1.append(a1[i])
for ii in range((i + 1), len(c1)):
if a1[i] < c1[i]:
c1[ii] = c1[ii] - 1
else:
c1[ii] = c1[ii] + 1
elif a1[i] == c1[i]: ##投票算法的核心思想是如果其中两种的分词结果一样,则分词结果为票数多的分词结果。
d1.append(a1[i])
for ii in range((i + 1), len(b1)):
if a1[i] < b1[i]:
b1[ii] = b1[ii] - 1###使不同的分词+1或者-1,从下一个位置开始比较。
else:
b1[ii] = b1[ii] + 1
elif c1[i] == b1[i]:
d1.append(b1[i])
for ii in range((i + 1), len(a1)):
if b1[i] < a1[i]:
a1[ii] = a1[ii] - 1
else:
a1[ii] = a1[ii] + 1
print('Decision Fusion:')
print(d1)#[1, 3, 5, 7, 9, 11, 12, 14]
sumPos = 0
listTemp = []
for i in range(0, max(d1)):
listTemp.append('|')
sNum = 0
print(listTemp)#['|', '|', '|', '|', '|', '|', '|', '|', '|', '|', '|', '|', '|', '|'],形成d1中数量最大的个数的'|'
for nPos in d1:
listTemp[nPos - 1] = s[sNum]
sNum = sNum + 1
print(listTemp)#['我', '|', '爱', '|', '你', '|', ',', '|', '桂', '|', '书', '品', '|', '!']把listTemp中'|'通过d1的序号提换成s中的字符。
outputStr = ''.join(listTemp)
outputStr = outputStr.split('|')
print(outputStr)#['我', '爱', '你', ',', '桂', '书品', '!']
return outputStr