1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
| #!/usr/bin/python
# -*- coding: UTF-8 -*-
import re
def NumberOfOccurencesOfWordInText(word,text):
"""Returns the nb. of occurences of whole word(s) (case insensitive) in a text"""
#Basic attempt, does not pass all the tests:
if affiche_word=='oui':
print 'word = ',word
return len(\
re.findall(\
(re.match('[a-z]',word,re.I)!=None)\
*"(?<![a-z])((?<!')|(?<=''))"\
+ word\
+ (re.match('[a-z]',word[-1],re.I)!=None)\
*"(?![a-z])((?!')|(?=''))",\
text, re.IGNORECASE)\
)
def testNumberOfOccurencesOfWordInText():
""" Test the NumberOfOccurencesOfWordInText function"""
text="""Antoine is my name and I like python. Oh ! your name is antoine? And you like Python!
Yes is is true, I like PYTHON
and my name is ANTOINE"""
# test with a little text.
assert( 3 == NumberOfOccurencesOfWordInText("Antoine",text) )
assert( 3 == NumberOfOccurencesOfWordInText("ANTOINE",text) )
assert( 3 == NumberOfOccurencesOfWordInText("antoine",text) )
assert( 0 == NumberOfOccurencesOfWordInText("antoin",text) )
assert( 3 == NumberOfOccurencesOfWordInText("python",text) )
assert( 3 == NumberOfOccurencesOfWordInText("PYTHON",text) )
assert( 2 == NumberOfOccurencesOfWordInText("I",text) )
assert( 0 == NumberOfOccurencesOfWordInText("n",text) )
assert( 1 == NumberOfOccurencesOfWordInText("true",text) )
# regard ' as text:
assert ( 0 == NumberOfOccurencesOfWordInText ( "connor", "John O'connor is my friend" ) )
# Test it but with a BIG length file. (we once had a memory error with this...)
text = """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy dog.""" * 500
text += """The quick brown fox jump over the lazy dog.The quick brown Antoine jump over the lazy dog."""
text += """esrf sqfdg sfdglkj sdflgh sdflgjdsqrgl """ * 4000
text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy python."""
text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy dog.""" * 500
text += """The quick brown fox jump over the lazy dog.The quick brown Antoine jump over the lazy dog."""
text += """esrf sqfdg sfdglkj sdflgh sdflgjdsqrgl """ * 4000
text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy python."""
text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy dog.""" * 500
text += """The quick brown fox jump over the lazy dog.The quick brown Antoine jump over the lazy dog."""
text += """esrf sqfdg sfdglkj sdflgh sdflgjdsqrgl """ * 4000
text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy python."""
text += """The quick brown fox jump over the true lazy dog.The quick brown fox jump over the lazy dog."""
text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy dog.""" * 500
text += """ I vsfgsdfg sfdg sdfg sdgh sgh I sfdgsdf"""
text += """The quick brown fox jump over the lazy dog.The quick brown fox jump over the lazy dog.""" * 500
assert( 3 == NumberOfOccurencesOfWordInText("Antoine",text) )
assert( 3 == NumberOfOccurencesOfWordInText("ANTOINE",text) )
assert( 3 == NumberOfOccurencesOfWordInText("antoine",text) )
assert( 0 == NumberOfOccurencesOfWordInText("antoin",text) )
assert( 3 == NumberOfOccurencesOfWordInText("python",text) )
assert( 3 == NumberOfOccurencesOfWordInText("PYTHON",text) )
assert( 2 == NumberOfOccurencesOfWordInText("I",text) )
assert( 0 == NumberOfOccurencesOfWordInText("n",text) )
assert( 1 == NumberOfOccurencesOfWordInText("true",text) )
assert( 0 == NumberOfOccurencesOfWordInText("retirement home",
"I am a senior citizen and I live in the Fun-Plex 'Retirement Home' in Sopchoppy, Florida") )
assert( 1 == NumberOfOccurencesOfWordInText("'retirement home'",
"I am a senior citizen and I live in the Fun-Plex 'Retirement Home' in Sopchoppy, Florida") )
assert( 1 == NumberOfOccurencesOfWordInText("retirement home",
"I am a senior citizen and I live in the Fun-Plex (Retirement Home) in Sopchoppy, Florida") )
assert( 1 == NumberOfOccurencesOfWordInText("retirement home",
"Retirement Home\" in Sopchoppy, Florida" )
assert( 1 == NumberOfOccurencesOfWordInText("retirement home",
u"I am a senior citizen and I live in the Fun-Plex «Retirement Home» in Sopchoppy, Florida") )
assert( 1 == NumberOfOccurencesOfWordInText("retirement home",
u"I am a senior citizen and I live in the Fun-Plex \u201cRetirement Home\u201d in Sopchoppy, Florida") )
assert( 1 == NumberOfOccurencesOfWordInText("legitimate",
u"who is approved by OILS is completely legitimate: their employees are of legal working age") )
assert( 0 == NumberOfOccurencesOfWordInText("legitimate their",
u"who is approved by OILS is completely legitimate: their employees are of legal working age") )
assert( 1 == NumberOfOccurencesOfWordInText("get back to me",
u"I hope you will consider this proposal, and get back to me as soon as possible") )
assert( 1 == NumberOfOccurencesOfWordInText("skin-care",
u"enable Delavigne and its subsidiaries to create a skin-care monopoly") )
assert( 1 == NumberOfOccurencesOfWordInText("skin-care monopoly",
u"enable Delavigne and its subsidiaries to create a skin-care monopoly") )
assert( 0 == NumberOfOccurencesOfWordInText("skin-care monopoly in the US",
u"enable Delavigne and its subsidiaries to create a skin-care monopoly") )
assert( 1 == NumberOfOccurencesOfWordInText("get back to me",
u"When you know:get back to me") )
assert( 1 == NumberOfOccurencesOfWordInText("don't be left" , """emergency alarm warning.
Don't be left unprotected. Order your SSSS3000 today!""" ) )
assert( 1 == NumberOfOccurencesOfWordInText("don" , """emergency alarm warning.
Don't be left unprotected. Order your don SSSS3000 today!""" ) )
assert( 1 == NumberOfOccurencesOfWordInText("take that as a 'yes'",
"Do I have to take that as a 'yes'?") )
assert( 1 == NumberOfOccurencesOfWordInText("don't take that as a 'yes'",
"I don't take that as a 'yes'?") )
assert( 1 == NumberOfOccurencesOfWordInText("take that as a 'yes'",
"I don't take that as a 'yes'?") )
assert( 1 == NumberOfOccurencesOfWordInText("don't",
"I don't take that as a 'yes'?") )
assert( 1 == NumberOfOccurencesOfWordInText("attaching my c.v. to this e-mail",
"I am attaching my c.v. to this e-mail." ))
assert ( 1 == NumberOfOccurencesOfWordInText ( "Local", "'''Local Perfumer Found Dead on Laboratory Floor'''" ))
assert ( 1 == NumberOfOccurencesOfWordInText ( "Local Perfumer", "'''Local Perfumer Found Dead on Laboratory Floor'''" ))
assert ( 1 == NumberOfOccurencesOfWordInText ( "Laboratory Floor", "'''Local Perfumer Found Dead on Laboratory Floor'''" ))
assert ( 1 == NumberOfOccurencesOfWordInText ( "Floor", "'''Local Perfumer Found Dead on Laboratory Floor'''" ))
assert ( 1 == NumberOfOccurencesOfWordInText ( "Floor", "''Local Perfumer Found Dead on Laboratory Floor''" ))
assert ( 1 == NumberOfOccurencesOfWordInText ( "Floor", "__Local Perfumer Found Dead on Laboratory Floor__" ))
assert ( 1 == NumberOfOccurencesOfWordInText ( "Floor", "'''''Local Perfumer Found Dead on Laboratory Floor'''''" ))
assert ( 1 == NumberOfOccurencesOfWordInText ( "Local", "'''Local Perfumer Found Dead on Laboratory Floor'''" ))
assert ( 1 == NumberOfOccurencesOfWordInText ( "Local", "''Local Perfumer Found Dead on Laboratory Floor''" ))
assert ( 1 == NumberOfOccurencesOfWordInText ( "Local", "__Local Perfumer Found Dead on Laboratory Floor__" ))
assert ( 1 == NumberOfOccurencesOfWordInText ( "Local", "'''''Local Perfumer Found Dead on Laboratory Floor'''''" ))
SampleTextForBench = """
A Suggestion Box Entry from Bob Carter
Dear Anonymous,
I'm not quite sure I understand the concept of this 'Anonymous' Suggestion Box. If no one reads what we write, then how will anything ever
change?
But in the spirit of good will, I've decided to offer my two cents, and hopefully Kevin won't steal it! (ha, ha). I would really like to
see more varieties of coffee in the coffee machine in the break room. 'Milk and sugar', 'black with sugar', 'extra sugar' and 'cream and su
gar' don't offer much diversity. Also, the selection of drinks seems heavily weighted in favor of 'sugar'. What if we don't want any suga
r?
But all this is beside the point because I quite like sugar, to be honest. In fact, that's my second suggestion: more sugar in the office.
Cakes, candy, insulin, aspartame... I'm not picky. I'll take it by mouth or inject it intravenously, if I have to.
Also, if someone could please fix the lock on the men's room stall, that would be helpful. Yesterday I was doing my business when Icarus ne
arly climbed into my lap.
So, have a great day!
Anonymously,
Bob Carter
"""
def doit():
"""Run NumberOfOccurencesOfWordInText on a few examples"""
i = 0
print "Itération de x de 0 à 399 :"
for x in xrange(400):
if x%50==0:
print 'x =',x,' i =',i
i+= NumberOfOccurencesOfWordInText("word" , SampleTextForBench)
i+= NumberOfOccurencesOfWordInText("sugar" , SampleTextForBench)
i+= NumberOfOccurencesOfWordInText("help" , SampleTextForBench)
i+= NumberOfOccurencesOfWordInText("heavily" , SampleTextForBench)
i+= NumberOfOccurencesOfWordInText("witfull" , SampleTextForBench)
i+= NumberOfOccurencesOfWordInText("dog" , SampleTextForBench)
i+= NumberOfOccurencesOfWordInText("almost" , SampleTextForBench)
i+= NumberOfOccurencesOfWordInText("insulin" , SampleTextForBench)
i+= NumberOfOccurencesOfWordInText("attaching" , SampleTextForBench)
i+= NumberOfOccurencesOfWordInText("asma" , SampleTextForBench)
i+= NumberOfOccurencesOfWordInText("neither" , SampleTextForBench)
i+= NumberOfOccurencesOfWordInText("won't" , SampleTextForBench)
i+= NumberOfOccurencesOfWordInText("green" , SampleTextForBench)
i+= NumberOfOccurencesOfWordInText("parabole" , SampleTextForBench)
print i
#Start the tests
if __name__ == '__main__':
#I need to pass the test:
affiche_word = 'oui'
try:
testNumberOfOccurencesOfWordInText()
except:
print "Error !"
raise
print "\n\nTests passed\n\n"
#I need to be fast as well:
import profile
affiche_word = 'non'
profile.run('doit()') |
Partager