44
55from twitter import TwitterError # import not used?
66
7+ class Emoticons :
8+ POSITIVE = ["*O" ,"*-*" ,"*O*" ,"*o*" ,"* *" ,
9+ ":P" ,":D" ,":d" ,":p" ,
10+ ";P" ,";D" ,";d" ,";p" ,
11+ ":-)" ,";-)" ,":=)" ,";=)" ,
12+ ":<)" ,":>)" ,";>)" ,";=)" ,
13+ "=}" ,":)" ,"(:;)" ,
14+ "(;" ,":}" ,"{:" ,";}" ,
15+ "{;:]" ,
16+ "[;" ,":')" ,";')" ,":-3" ,
17+ "{;" ,":]" ,
18+ ";-3" ,":-x" ,";-x" ,":-X" ,
19+ ";-X" ,":-}" ,";-=}" ,":-]" ,
20+ ";-]" ,":-.)" ,
21+ "^_^" ,"^-^" ]
22+
23+ NEGATIVE = [":(" ,";(" ,":'(" ,
24+ "=(" ,"={" ,"):" ,");" ,
25+ ")':" ,")';" ,")=" ,"}=" ,
26+ ";-{{" ,";-{" ,":-{{" ,":-{" ,
27+ ":-(" ,";-(" ,
28+ ":,)" ,":'{" ,
29+ "[:" ,";]"
30+ ]
731
832class ParseTweet :
933 # compile once on import
1034 regexp = {"RT" : "^RT" , "MT" : r"^MT" , "ALNUM" : r"(@[a-zA-Z0-9_]+)" ,
11- "HASHTAG" : r"(#[\w\d]+)" , "URL" : r"([http://]?[a-zA-Z\d\/]+[\.]+[a-zA-Z\d\/\.]+)" }
35+ "HASHTAG" : r"(#[\w\d]+)" , "URL" : r"([https://|http://]?[a-zA-Z\d\/]+[\.]+[a-zA-Z\d\/\.]+)" ,
36+ "SPACES" :r"\s+" }
1237 regexp = dict ((key , re .compile (value )) for key , value in regexp .items ())
1338
1439 def __init__ (self , timeline_owner , tweet ):
@@ -25,7 +50,8 @@ def __init__(self, timeline_owner, tweet):
2550 self .URLs = ParseTweet .getURLs (tweet )
2651 self .RT = ParseTweet .getAttributeRT (tweet )
2752 self .MT = ParseTweet .getAttributeMT (tweet )
28-
53+ self .Emoticon = ParseTweet .getAttributeEmoticon (tweet )
54+
2955 # additional intelligence
3056 if ( self .RT and len (self .UserHandles ) > 0 ): # change the owner of tweet?
3157 self .Owner = self .UserHandles [0 ]
@@ -36,6 +62,18 @@ def __str__(self):
3662 return "owner %s, urls: %d, hashtags %d, user_handles %d, len_tweet %d, RT = %s, MT = %s" % (
3763 self .Owner , len (self .URLs ), len (self .Hashtags ), len (self .UserHandles ), len (self .tweet ), self .RT , self .MT )
3864
65+ @staticmethod
66+ def getAttributeEmoticon (tweet ):
67+ """ see if tweet is contains any emoticons, +ve, -ve or neutral """
68+ emoji = list ()
69+ for tok in re .split (ParseTweet .regexp ["SPACES" ],tweet .strip ()):
70+ if tok in Emoticons .POSITIVE :
71+ emoji .append ( tok )
72+ continue
73+ if tok in Emoticons .NEGATIVE :
74+ emoji .append ( tok )
75+ return emoji
76+
3977 @staticmethod
4078 def getAttributeRT (tweet ):
4179 """ see if tweet is a RT """
0 commit comments