Extracts minimum repeating string from string variable - lvphj/epydemiology GitHub Wiki
Example 1
df = pd.DataFrame({'OriginalStr':['Value:abcdabcd','defdefxyz','hij hij ']})
print(df)
OriginalStr
0 Value:abcdabcd
1 defdefxyz
2 hij hij
df = epy.phjAddColumnOfMinRepeatingString(phjDF = df,
phjColName = 'OriginalStr',
phjNewColName = 'RepeatingStr',
phjPrefixStr = 'Value:',
phjSuffixStr = 'xyz',
phjStripWhiteSpc = True,
phjPrintResults = True)
Returned dataframe
==================
OriginalStr RepeatingStr
0 Value:abcdabcd abcd
1 defdefxyz def
2 hij hij hij
Example 2
myExampleDF = pd.DataFrame({'somevariable':['Producer: 12/345/6789 Other Mr A N',
'Mixed Batch Mixed Batch ',
' Another mixed batch Another mixed batch'
'',
'FBO 98/765/4321 Happy Fmrs (Smith) AB1234',
'FBO KARRO - MALTON UNREAD',
'Market: 56/789/1234 Livestock Sellers 56/789/1234 Livestock Sellers',
'Market: Repeated string Repeated string Repeated string Repeated stringendbit']})
print(myExampleDF)
somevariable
0 Producer: 12/345/6789 Other Mr A N
1 Mixed Batch Mixed Batch
2 Another mixed batch Another mixed batch
3 FBO 98/765/4321 Happy Fmrs (Smith) AB1234
4 FBO KARRO - MALTON UNREAD
5 Market: 56/789/1234 Livestock Sellers 56/789/1...
6 Market: Repeated string Repeated string Repeat...
phjExampleDF = epy.phjAddColumnOfMinRepeatingString(phjDF = myExampleDF,
phjColName = 'somevariable',
phjNewColName = 'somevariable_norpt',
phjPrefixStr = 'Market:',
phjSuffixStr = 'endbit',
phjReattachAffixes = True,
phjReduceMultiSpc = True,
phjStripWhiteSpc = True,
phjPrintResults = False)
print(phjExampleDF)
somevariable \
0 Producer: 12/345/6789 Other Mr A N
1 Mixed Batch Mixed Batch
2 Another mixed batch Another mixed batch
3 FBO 98/765/4321 Happy Fmrs (Smith) AB1234
4 FBO KARRO - MALTON UNREAD
5 Market: 56/789/1234 Livestock Sellers 56/789/1...
6 Market: Repeated string Repeated string Repeat...
somevariable_norpt
0 Producer: 12/345/6789 Other Mr A N
1 Mixed Batch
2 Another mixed batch
3 FBO 98/765/4321 Happy Fmrs (Smith) AB1234
4 FBO KARRO - MALTON UNREAD
5 Market: 56/789/1234 Livestock Sellers
6 Market: Repeated stringendbit