1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
| >>> import requests, re
>>> from HTMLParser import HTMLParser
>>> url = 'https://www.developpez.net/forums/d1776091/autres-langages/python-zope/general-python/supprimer-emojis-d-texte/'
>>> parser = HTMLParser()
>>> reg1 = re.compile(u'----<br />\r\n(.+)\r\n-----', re.UNICODE | re.DOTALL)
>>> reg2 = re.compile(u'['
... u'\U0001F300-\U0001F64F'
... u'\U0001F680-\U0001F6FF'
... u'\u2600-\u26FF\u2700-\u27BF]+',
... re.UNICODE | re.DOTALL)
>>> page = requests.get(url).text
>>> temp = re.search(reg1, page).group(1).replace('<br />', '').replace('\r', '')
>>> temp
u'track 💯🙌🏼🔥get invit ritaora rudiment live novasredroom\n😍want ritaora rudiment live novasredroom invit\n✈️want fli melbourn guy live invit novasredroom unlock\n🔥want guy live free invit novasredroom unlock\nbumpi speedi much charact mark_skaif explain pukekoh insid set-up vasc🎥\n🚨new perk alert🚨 student score select fee\n🚨just announced🚨you rudiment novasredroom unlock 🎉keep nova'
>>> texte = parser.unescape(temp)
>>> texte
u'track \U0001f4af\U0001f64c\U0001f3fc\U0001f525get invit ritaora rudiment live novasredroom\n\U0001f60dwant ritaora rudiment live novasredroom invit\n\u2708\ufe0fwant fli melbourn guy live invit novasredroom unlock\n\U0001f525want guy live free invit novasredroom unlock\nbumpi speedi much charact mark_skaif explain pukekoh insid set-up vasc\U0001f3a5\n\U0001f6a8new perk alert\U0001f6a8 student score select fee\n\U0001f6a8just announced\U0001f6a8you rudiment novasredroom unlock \U0001f389keep nova'
>>> re.sub(reg2, u'', texte)
u'track get invit ritaora rudiment live novasredroom\nwant ritaora rudiment live novasredroom invit\n\ufe0fwant fli melbourn guy live invit novasredroom unlock\nwant guy live free invit novasredroom unlock\nbumpi speedi much charact mark_skaif explain pukekoh insid set-up vasc\nnew perk alert student score select fee\njust announcedyou rudiment novasredroom unlock keep nova' |
Partager