"""Bluesky source class.
https://bsky.app/
https://atproto.com/lexicons/app-bsky-actor
https://github.com/bluesky-social/atproto/tree/main/lexicons/app/bsky
"""
import copy
import logging
import urllib.parse
from granary.source import Source, OMIT_LINK
from oauth_dropins.webutil import util
[docs]def url_to_did_web(url):
"""Converts a URL to a did:web.
Examples:
* 'https://foo.com' => 'did:web:foo.com'
* 'https://foo.com:3000' => 'did:web:foo.com%3A3000'
* 'https://bar.com/baz/baj' => 'did:web:bar.com:baz:baj'
https://w3c-ccg.github.io/did-method-web/#example-creating-the-did
TODO: require https?
Args:
url: str
Returns: str
"""
parsed = urllib.parse.urlparse(url)
if not parsed.netloc:
raise ValueError(f'Invalid URL: {url}')
did = f'did:web:{urllib.parse.quote(parsed.netloc)}'
if parsed.path:
did += f'{parsed.path.replace("/", ":")}'
return did.strip(':')
[docs]def did_web_to_url(did):
"""Converts a did:web to a URL.
Examples:
* 'did:web:foo.com' => 'https://foo.com'
* 'did:web:foo.com%3A3000' => 'https://foo.com:3000'
* 'did:web:bar.com:baz:baj' => 'https://bar.com/baz/baj'
https://w3c-ccg.github.io/did-method-web/#read-resolve
Args:
did: str
Returns: str
"""
if not did or not did.startswith('did:web:'):
raise ValueError(f'Invalid did:web: {did}')
did = did.removeprefix('did:web:')
if ':' in did:
host, path = did.split(':', 1)
else:
host = did
path = ''
host = urllib.parse.unquote(host)
path = urllib.parse.unquote(path.replace(':', '/'))
return f'https://{host}/{path}'
[docs]def from_as1(obj, from_url=None):
"""Converts an AS1 object to a Bluesky object.
The objectType field is required.
Args:
obj: dict, AS1 object or activity
from_url: str, optional URL the original object was fetched from.
Currently unused. TODO: remove?
Returns: dict, app.bsky.* object
Raises:
ValueError
if the objectType or verb fields are missing or unsupported
"""
activity = obj
verb = activity.get('verb') or 'post'
inner_obj = activity.get('object')
if inner_obj and verb == 'post':
obj = inner_obj
type = obj.get('objectType') or 'note'
actor = activity.get('actor')
# TODO: once we're on Python 3.10, switch this to a match statement!
if type == 'person':
# banner is featured image, if available
banner = None
for img in util.get_list(obj, 'image'):
url = img.get('url')
if img.get('objectType') == 'featured' and url:
banner = url
break
url = util.get_url(obj) or obj.get('id') or ''
try:
did_web = url_to_did_web(url)
except ValueError as e:
logging.info(f"Couldn't generate did:web: {e}")
did_web = ''
# handle is username@domain or domain/path, no scheme or query
username = obj.get('username')
parsed = urllib.parse.urlparse(url)
domain = parsed.netloc
if username:
handle = username
if domain:
handle += f'@{domain}'
elif url:
handle = domain
if parsed.path not in ('', '/'):
handle += parsed.path
else:
handle = ''
ret = {
'$type': 'app.bsky.actor.profile',
'displayName': obj.get('displayName'),
'description': obj.get('summary'),
'avatar': util.get_url(obj, 'image'),
'banner': banner,
'did': did_web,
# this is a DID
# atproto/packages/pds/src/api/app/bsky/actor/getProfile.ts#38
'creator': did_web,
'declaration': {
'$type': 'app.bsky.system.declRef',
# Content ID, aka content-hash fingerprint. Immutable hash that
# identifies a node in a PDS.
# https://atproto.com/guides/applications#record-types
# https://github.com/multiformats/cid
# https://atproto.com/guides/data-repos#data-layout
# atproto/lexicons/com/atproto/repo/strongRef.json
'cid': 'TODO',
'actorType': 'app.bsky.system.actorUser',
},
# TODO: should be more specific than domain, many users will be on shared
# domains
'handle': handle,
'followersCount': 0,
'followsCount': 0,
'membersCount': 0,
'postsCount': 0,
}
elif verb == 'share':
ret = from_as1(inner_obj)
ret['reason'] = {
'$type': 'app.bsky.feed.feedViewPost#reasonRepost',
'by': actor_to_ref(actor),
'indexedAt': util.now().isoformat(),
}
elif verb == 'follow':
assert actor
ret = {
'$type': 'app.bsky.graph.follow',
'subject': actor.get('id') or actor.get('url'),
'createdAt': obj.get('published', ''),
}
elif verb == 'post' and type in ('article', 'mention', 'note', 'comment'):
# convert text to HTML and truncate
src = Bluesky()
text = src._content_for_create(obj)
is_html = text != (obj.get('summary') or obj.get('content') or obj.get('name'))
text = src.truncate(text, None, OMIT_LINK)
# text tags
entities = []
content = obj.get('content')
for tag in util.get_list(obj, 'tags'):
url = tag.get('url')
if url:
try:
start = int(tag.get('startIndex'))
if is_html and start:
raise NotImplementedError('HTML content is not supported with index tags')
end = start + int(tag.get('length'))
tag_text = content[start:end] if content else None
except (ValueError, IndexError):
tag_text = start = end = None
entities.append({
'type': 'link',
'value': url,
'text': tag_text,
'index': {
'start': start,
'end': end,
},
})
# images
post_embed = record_embed = None
images = util.get_list(obj, 'image')
if images:
post_embed = {
'$type': 'app.bsky.embed.images#presented',
'images': [{
'$type': 'app.bsky.embed.images#presentedImage',
'thumb': img.get('url'),
'fullsize': img.get('url'),
'alt': img.get('displayName'),
} for img in images[:4]],
}
record_embed = {
'$type': 'app.bsky.embed.images',
'images': [{
'$type': 'app.bsky.embed.images#image',
'image': img.get('url'),
'alt': img.get('displayName'),
} for img in images[:4]],
}
elif entities:
post_embed = {
'$type': 'app.bsky.embed.external#presented',
'external': [{
'$type': 'app.bsky.embed.external#presentedExternal',
'uri': entity['value'],
'title': entity['text'],
'description': '',
} for entity in entities],
}
record_embed = {
'$type': 'app.bsky.embed.external',
'external': [{
'$type': 'app.bsky.embed.external#external',
'uri': entity['value'],
'title': entity['text'],
'description': '',
} for entity in entities],
}
author = obj.get('author')
ret = {
'$type': 'app.bsky.feed.feedViewPost',
'post': {
'$type': 'app.bsky.feed.post#view',
'uri': util.get_url(obj),
'cid': 'TODO',
'record': {
'$type': 'app.bsky.feed.post',
'text': text,
'createdAt': obj.get('published', ''),
'embed': record_embed,
'entities': entities,
},
'author': actor_to_ref(author) if author else None,
'embed': post_embed,
'replyCount': 0,
'repostCount': 0,
'upvoteCount': 0,
'downvoteCount': 0,
'indexedAt': util.now().isoformat(),
'viewer': {},
},
}
in_reply_to = util.get_url(obj, 'inReplyTo')
if in_reply_to:
ret['post']['record']['reply'] = {
'$type': 'app.bsky.feed.post#replyRef',
'root': {
'$type': 'com.atproto.repo.strongRef',
'uri': in_reply_to,
'cid': 'TODO',
},
'parent': {
'$type': 'com.atproto.repo.strongRef',
'uri': in_reply_to,
'cid': 'TODO',
},
}
else:
raise ValueError(f'AS1 object has unknown objectType {type} or verb {verb}')
# keep some fields that are required by lexicons
return util.trim_nulls(ret, ignore=(
'createdAt',
'description',
'did',
'handle',
'text',
'viewer',
))
[docs]def actor_to_ref(actor):
"""Converts an AS1 actor to a Bluesky `app.bsky.actor.ref#withInfo`.
Args:
actor: dict, AS1 actor
Returns: dict, `app.bsky.actor.ref#withInfo` object
"""
if not actor:
return None
actor = copy.deepcopy(actor)
actor.setdefault('objectType', 'person')
ref = {
k: v for k, v in from_as1(actor).items()
if k in ('avatar', 'declaration', 'did', 'displayName', 'handle', 'indexedAt')
}
ref['$type'] = 'app.bsky.actor.ref#withInfo'
return ref
[docs]def to_as1(obj):
"""Converts a Bluesky object to an AS1 object.
The $type field is required.
Args:
profile: dict, app.bsky.* object
Returns: dict, AS1 object
Raises:
ValueError
if the $type field is missing or unsupported
"""
if not obj:
return {}
type = obj.get('$type')
if not type:
raise ValueError('Bluesky object missing $type field')
# TODO: once we're on Python 3.10, switch this to a match statement!
if type in ('app.bsky.actor.profile', 'app.bsky.actor.ref#withInfo'):
images = [{'url': obj.get('avatar')}]
banner = obj.get('banner')
if banner:
images.append({'url': obj.get('banner'), 'objectType': 'featured'})
did = obj.get('did')
ret = {
'objectType': 'person',
'displayName': obj.get('displayName'),
'summary': obj.get('description'),
'image': images,
'url': did_web_to_url(did) if did else None,
}
elif type == 'app.bsky.feed.post':
tags = []
for entity in obj.get('entities', []):
if entity.get('type') == 'link':
index = entity.get('index')
start = index.get('start', 0)
end = index.get('end', 0)
tags.append({
'url': entity.get('value'),
'startIndex': start,
'length': end - start,
})
in_reply_to = obj.get('reply', {}).get('parent', {}).get('uri')
ret = {
'objectType': 'comment' if in_reply_to else 'note',
'content': obj.get('text', ''),
'inReplyTo': [{'url': in_reply_to}],
'published': obj.get('createdAt', ''),
'tags': tags,
}
elif type == 'app.bsky.feed.post#view':
ret = to_as1(obj.get('record'))
ret.update({
'url': obj.get('uri'),
'author': to_as1(obj.get('author')),
'image': to_as1(obj.get('embed')),
})
elif type == 'app.bsky.embed.images#presented':
ret = [{
'url': img.get('fullsize'),
'displayName': img.get('alt'),
} for img in obj.get('images', [])]
elif type == 'app.bsky.feed.feedViewPost':
ret = to_as1(obj.get('post'))
reason = obj.get('reason')
if reason and reason.get('$type') == 'app.bsky.feed.feedViewPost#reasonRepost':
ret = {
'objectType': 'activity',
'verb': 'share',
'object': ret,
'actor': to_as1(reason.get('by')),
}
elif type == 'app.bsky.graph.follow':
ret = {
'objectType': 'activity',
'verb': 'follow',
'actor': {
'url': obj.get('subject'),
},
}
else:
raise ValueError(f'Bluesky object has unknown $type: {type}')
return util.trim_nulls(ret)
[docs]class Bluesky(Source):
"""Bluesky source class. See file docstring and Source class for details."""
DOMAIN = 'bsky.app'
BASE_URL = 'https://bsky.app'
NAME = 'Bluesky'
TRUNCATE_TEXT_LENGTH = 256 # TODO: load from feed.post lexicon