"""Google+ source class.
The Google+ API currently only returns public activities and comments, so the
Audience Targeting 'to' field is always set to @public.
https://developers.google.com/+/api/latest/activities/list#collection
"""
from __future__ import absolute_import, division, unicode_literals
from builtins import range
import datetime
import functools
import itertools
import json
import re
from . import appengine_config
from . import source
from apiclient.errors import HttpError
from apiclient.http import BatchHttpRequest
from oauth_dropins.webutil import util
SEARCH_MAX_RESULTS = 20
HTML_ACTIVITIES_RE = re.compile(
r"AF_initDataCallback\({key: *'ds:[^']*', *isError: *false *, *hash: *'[^']*', *data:function\(\){return *(\[\[[^[].+?)}}\);</script>",
re.DOTALL)
[docs]class GooglePlus(source.Source):
"""Google+ source class. See file docstring and Source class for details.
The Google+ API already exposes data in ActivityStreams format, so this is
mostly just a pass through.
"""
DOMAIN = 'plus.google.com'
BASE_URL = 'https://plus.google.com/'
NAME = 'Google+'
# HTML snippet for embedding a post.
# https://developers.google.com/+/web/embedded-post/
EMBED_POST = """
<script async defer type="text/javascript"
src="https://apis.google.com/js/platform.js"></script>
<div class="g-post" data-href="%(url)s">
<p><a href="%(url)s" target="_top">%(content)s</a></p>
</div>
"""
[docs] def __init__(self, auth_entity=None, access_token=None):
"""Constructor.
Currently, only auth_entity is supported. TODO: implement access_token.
Args:
access_token: string OAuth access token
auth_entity: oauth-dropins.googleplus.GooglePlusAuth
"""
self.access_token = access_token
self.auth_entity = auth_entity
def user_url(self, user_id):
return 'https://%s/%s' % (self.DOMAIN, user_id)
[docs] def get_actor(self, user_id=None):
"""Returns a user as a JSON ActivityStreams actor dict.
Args:
user_id: string id or username. Defaults to 'me', ie the current user.
Raises: GooglePlusAPIError
"""
if self.auth_entity:
if user_id and user_id != self.auth_entity.key.id():
raise NotImplementedError("Fetching other users isn't suported yet.")
if self.auth_entity.user_json:
return self.postprocess_actor(json.loads(self.auth_entity.user_json))
return {}
[docs] def get_activities_response(self, user_id=None, group_id=None, app_id=None,
activity_id=None, start_index=0, count=0,
etag=None, min_id=None, cache=None,
fetch_replies=False, fetch_likes=False,
fetch_shares=False, fetch_events=False,
fetch_mentions=False, search_query=None, **kwargs):
"""Fetches posts and converts them to ActivityStreams activities.
See method docstring in source.py for details. app_id is ignored.
Replies (comments), likes (+1s), and shares (reshares) each need an extra
API call per activity. The activity has total counts for them, though, so we
only make those calls when we know there's something to fetch.
https://developers.google.com/+/api/latest/comments/list
https://developers.google.com/+/api/latest/people/listByActivity
We also batch those calls into a single HTTP request, so there are at most
two HTTP requests total, one to get activities and optionally one to get new
responses.
https://developers.google.com/api-client-library/python/guide/batch
Mentions are not currently fetched or included because the API doesn't
explicitly support searching for them. It could be approximated, though:
https://github.com/snarfed/bridgy/issues/523#issuecomment-155523875
"""
if user_id is None:
user_id = 'me'
http = self.auth_entity.http()
if etag:
# monkey patch the ETag header in because google-api-python-client doesn't
# support setting request headers yet:
# http://code.google.com/p/google-api-python-client/issues/detail?id=121
orig_request = http.request
def request_with_etag(*args, **kwargs):
kwargs.setdefault('headers', {}).update({'If-None-Match': etag})
return orig_request(*args, **kwargs)
http.request = request_with_etag
# https://developers.google.com/+/api/latest/activities
try:
if activity_id:
call = self.auth_entity.api().activities().get(activityId=activity_id)
activities = [call.execute(http=http)]
elif search_query:
call = self.auth_entity.api().activities().search(
query=search_query, maxResults=min(count, SEARCH_MAX_RESULTS))
activities = call.execute(http=http).get('items', [])
else:
call = self.auth_entity.api().activities().list(
userId=user_id, collection='public', maxResults=count)
resp = call.execute(http=http)
activities = resp.get('items', [])
etag = resp.get('etag')
except HttpError as e:
if e.resp.status == 304: # Not Modified, from a matching ETag
activities = []
else:
raise
# batch get memcached counts of comments, likes, reshares for all activities
cached = {}
if cache is not None:
keys = itertools.product(('AGC', 'AGL', 'AGS'), [a['id'] for a in activities])
cached = cache.get_multi('%s %s' % (prefix, id) for prefix, id in keys)
# only update the cache at the end, in case we hit an error before then
cache_updates = {}
# prepare batch API requests for comments, likes and reshares
# https://developers.google.com/api-client-library/python/guide/batch
batch = BatchHttpRequest()
for activity in activities:
# comments
id = activity['id']
num_replies = activity.get('object', {}).get('replies', {}).get('totalItems')
if fetch_replies and num_replies and num_replies != cached.get('AGC ' + id):
call = self.auth_entity.api().comments().list(activityId=id, maxResults=500)
def set_comments(req_id, resp, exc, activity=None):
obj = activity.get('object', {})
if exc is None:
obj['replies']['items'] = [
self.postprocess_comment(c) for c in resp['items']]
cache_updates['AGC ' + activity['id']] = obj['replies']['totalItems']
else:
obj.pop('replies', None)
code, body = util.interpret_http_exception(exc)
if code not in ('404', '500', '502', '504'): # these happen; ignore them
raise exc
batch.add(call, callback=functools.partial(set_comments, activity=activity))
# likes
if fetch_likes:
self.maybe_add_tags(batch, activity, cached, cache_updates,
'plusoners', 'like')
# reshares
if fetch_shares:
self.maybe_add_tags(batch, activity, cached, cache_updates,
'resharers', 'share')
if batch._requests:
batch.execute(http=http)
for a in activities:
self.postprocess_activity(a)
response = self.make_activities_base_response(activities)
response['etag'] = etag
if cache_updates and cache is not None:
cache.set_multi(cache_updates)
return response
[docs] def postprocess_activity(self, activity):
"""Massage G+'s ActivityStreams dialect into our dialect, in place.
Args:
activity: ActivityStreams activity dict.
"""
activity['object']['author'] = self.postprocess_actor(activity['actor'])
activity['object']['to'] = [{'objectType': 'group', 'alias': '@public'}]
# also convert id to tag URI
activity['id'] = self.tag_uri(activity['id'])
return activity
[docs] def postprocess_actor(self, actor):
"""Massage G+'s ActivityStreams dialect into our dialect, in place.
Args:
actor: ActivityStreams actor dict.
"""
id = actor.get('id')
if id:
actor['id'] = self.tag_uri(id)
urls = actor.get('urls')
if urls and not actor.get('url'):
actor['url'] = urls[0].get('value')
return actor
user_to_actor = postprocess_actor
[docs] def html_to_activities(self, html):
"""Converts HTML from https://plus.google.com/ to ActivityStreams activities.
Args:
html: unicode string
Returns:
list of ActivityStreams activity dicts
"""
# extract JSON data blob
match = HTML_ACTIVITIES_RE.search(html)
if not match:
return []
html = match.group(1)
if not html:
return []
# insert placeholder nulls for omitted values, e.g. [,,,"x",,,] so that we
# can decode it as JSON. run twice to handle overlaps.
for i in range(2):
html = re.sub(r'([,[])\s*([],])', r'\1null\2', html)
data = json.loads(html)[0][7]
data = [list(d[6].values())[0] for d in data if len(d) >= 7 and d[6]]
activities = []
for d in data:
id = self.tag_uri(d[8])
url = 'https://%s/%s' % (self.DOMAIN, d[21]) # d[132] is full url
# posix timestamp in ms
published = datetime.datetime.utcfromtimestamp(d[5] / 1000).isoformat(util.T) + 'Z'
if d[69] and len(d[69]) >= 2 and d[69][1] and d[69][1][0]:
# this is a like, reshare, etc
verb = 'like' if d[69][0] == 202 else 'unknown'
profile = d[69][1][0]
actor = {
'id': self.tag_uri(profile[1]),
'url': profile[5],
'objectType': 'person',
'displayName': profile[0],
'image': {'url': profile[4]},
}
else:
# this is a direct post
verb = 'post'
actor = {
# more author details are in d[137]
'id': self.tag_uri(d[16]),
'url': self.user_url(d[16]),
'objectType': 'person',
'displayName': d[3],
'image': {'url': d[18]},
}
activity = {
'id': id,
'url': url,
'verb': verb,
'object': {
'id': id,
'url': url,
'objectType': 'note',
'content': d[20], # also in d[138] with different encoding
'published': published,
'updated': published,
},
'actor': actor,
}
loc = d[27]
if loc:
activity['location'] = {
'displayName': loc[2],
'url': loc[8],
'latitude': loc[0],
'longitude': loc[1],
}
attachments = d[97] and d[97][8]
if attachments:
activity['object']['attachments'] = [{
'objectType': 'article',
'url': att[0],
'image': {'url': att[1]},
'displayName': att[2],
'content': att[3],
} for att in attachments.values()]
self.postprocess_object(activity['object'])
activities.append(super(GooglePlus, self).postprocess_activity(activity))
return activities