« Zpět
Prohlížíte si socialwebmining_python.html. Obsah níže zobrazeného souboru byl profiltrován, aby neobsahoval případný škodlivý obsah a nemusí zcela odpovídat původnímu originálu. Stáhnout původní verzi
In [ ]:
__credits__ = ["Matthew Russel", "Kunal Jain"]
In [ ]:
import json
# alternativou je napr. skvela knihovna requests: http://www.python-requests.org/en/latest/
import httplib2

# aby vystupy vypadaly hezky i v ceskem jazyce
def pp(o): 
    print json.dumps(o, indent=1, ensure_ascii=False).encode('utf8')

# abych pri ukazce nevysilal do sveta sve credentials
import os
print os.environ.get('HOME')

Facebook

In [ ]:
# https://developers.facebook.com/tools/explorer/
# ziskani access tokenu lze zautomatizovat pomoci facebook app
ACCESS_TOKEN = ''
In [ ]:
# pip install facebook-sdk
import facebook

g = facebook.GraphAPI(ACCESS_TOKEN)
In [ ]:
# PedF UK
o_id = "426359397423041"
print pp(g.get_object(o_id))
In [ ]:
# facebook open graph
pp(g.get_object('http://spomocnik.rvp.cz/'))
In [ ]:

Google+

In [ ]:
# https://console.developers.google.com
GAPI_KEY = os.environ['GAPI_KEY']
In [ ]:
# pip install google-api-python-client
import apiclient.discovery
In [ ]:
q = "Udacity"
In [ ]:
service = apiclient.discovery.build('plus', 'v1', http=httplib2.Http(), developerKey=GAPI_KEY)
people_feed = service.people().search(query=q).execute()
print pp(people_feed['items'])
In [ ]:
# Udacity
o_id = '116286004036789369492'

service = apiclient.discovery.build('plus', 'v1', http=httplib2.Http(), developerKey=GAPI_KEY)

activity_feed = service.activities().list(
  userId=o_id,
  collection='public',
  maxResults='100'
).execute()

print pp(activity_feed)

Twitter

In [ ]:
# pip install twitter
# alternativou je napr. skvela knihovna tweepy: https://github.com/tweepy/tweepy
import twitter
In [ ]:
# https://apps.twitter.com/
CONSUMER_KEY = os.environ['CONSUMER_KEY']
CONSUMER_SECRET = os.environ['CONSUMER_SECRET']
OAUTH_TOKEN = os.environ['OAUTH_TOKEN']
OAUTH_TOKEN_SECRET = os.environ['OAUTH_TOKEN_SECRET']

auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
                           CONSUMER_KEY, CONSUMER_SECRET)

twitter_api = twitter.Twitter(auth=auth)

print twitter_api
In [ ]:
# https://dev.twitter.com/docs/api/1.1/get/trends/place and
# http://developer.yahoo.com/geo/geoplanet/
WORLD_WOE_ID = 1
US_WOE_ID = 23424977

world_trends = twitter_api.trends.place(_id=WORLD_WOE_ID)
print pp(world_trends)
In [ ]:
q = '#education' 
count = 100

search_results = twitter_api.search.tweets(q=q, count=count)
statuses = search_results['statuses']

print pp(statuses)
In [ ]:
status_texts = [ status['text'] 
                 for status in statuses ]

screen_names = [ user_mention['screen_name'] 
                 for status in statuses
                     for user_mention in status['entities']['user_mentions'] ]

hashtags = [ hashtag['text'] 
             for status in statuses
                 for hashtag in status['entities']['hashtags'] ]

print pp(hashtags)
In [ ]:
# nejvice popularni retweety
# pip install prettytable
from prettytable import PrettyTable

retweets = [
            (status['retweet_count'], 
             status['retweeted_status']['user']['screen_name'],
             status['text'])
            
            for status in statuses
            
                if status.has_key('retweeted_status')
           ]

# prvnich 5 nejvice popularnich retweetu
pt = PrettyTable(field_names=['Count', 'Screen Name', 'Text'])
[ pt.add_row(row) for row in sorted(retweets, reverse=True)[:5] ]
pt.max_width['Text'] = 50
pt.align= 'l'
print pt
In [ ]:
# uzivatele, kteri retweetli status
_retweets = twitter_api.statuses.retweets(id=563448349639340032)
print [r['user']['screen_name'] for r in _retweets]

YouTube

In [ ]:
#pip install google-api-python-client
from apiclient.discovery import build
#pip install google-api-python-client
from apiclient.errors import HttpError
#pip install pandas
import pandas as pd
In [ ]:
DEVELOPER_KEY = os.environ['DEVELOPER_KEY']
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
In [ ]:
myq = 'Khan Academy'
max_results = 25
In [ ]:
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)

search_response = youtube.search().list(
                                        q=myq,
                                        type="video",
                                        part="id,snippet",
                                        maxResults=max_results
                                        ).execute()

search_response
In [ ]:
videos = {}

# vyfiltrujeme kanaly a playlisty
for search_result in search_response.get("items", []):
    if search_result["id"]["kind"] == "youtube#video":
        videos[search_result["id"]["videoId"]] = search_result["snippet"]["title"]

print "Videos:\n", "\n".join(videos), "\n"
s = ','.join(videos.keys())
In [ ]:
videos_list_response = youtube.videos().list(
                                             id=s,
                                             part='id,statistics'
                                             ).execute()
res = []

for i in videos_list_response['items']:
    temp_res = dict(v_id = i['id'], v_title = videos[i['id']])
    temp_res.update(i['statistics'])
    res.append(temp_res)
In [ ]:
# ulozime do DataFrame a zobrazime ho
myDataFrame = pd.DataFrame.from_dict(res)
myDataFrame.describe()
In [ ]:
myDataFrame
In [ ]:
# pip install matplotlib
# http://matplotlib.org/
# na publikovatelnou vizualizaci doporucuji javascript knihovny (profici sahnou po http://d3js.org/)
# ...ale na exploraci dat je matplotlib vyborny
import matplotlib.pyplot as plt
plt.scatter(myDataFrame.index, myDataFrame['viewCount'])
plt.show()

Instagram

In [ ]:
# pip install python-instagram
# https://github.com/Instagram/python-instagram

Snapchat bot

NetworkX

In [ ]:
# pip install networkx
# https://networkx.github.io/