I built some Python 3.72 script to migrate all posts from Google+ to BlogSpot (blogger).
Below is the steps.
1. Download credentials.json from:
https://developers.google.com/blogger/docs/3.0/using
2. Log in https://www.blogger.com,get BlogId from URL
3. Download Google+ backup,then uncompress it to a folder
4. Modify the "BLOGID" in source code, post files and credentials.json file path(highlighted)
5. Run the script
www.blogger.com only allow uploading around 700 posts per day, so we will get "rate Limit Exceeded" exception during the uploading. This script will automatically re-try uploading the file when that happens.
To differentiate these migrated posts to the original posts, I added ' - GooglePlus' to the end of the post title.
Photos are not migrated.
The source code is shared here.
================
import time
import logging
import os
import fnmatch
from html.parser import HTMLParser
from bs4 import BeautifulSoup
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
logging.basicConfig(format='%(asctime)s,%(msecs)d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
datefmt='%Y-%m-%d:%H:%M:%S',
level=logging.DEBUG)
# FILE_FOLDER = 'C:\\EricFang\\Python3\\'
FILE_FOLDER = '\\\\pwdaddy\\Posts\\'
FILE_CREDENTIALS = 'C:\\EricFang\\Python3\\AddPostsToBlogSpot\\credentials.json'
FILE_PATTERN = '*.html'
BLOGID = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxx'
API_SERVICE_NAME = 'blogger'
API_VERSION = 'v3'
# https://developers.google.com/blogger/docs/3.0/using
# https://developers.google.com/identity/protocols/googlescopes
# SCOPES = ['https://www.googleapis.com/auth/blogger.readonly']
SCOPES = ['https://www.googleapis.com/auth/blogger']
g_appflow = InstalledAppFlow.from_client_secrets_file(
FILE_CREDENTIALS, SCOPES)
auth_url, _ = g_appflow.authorization_url(prompt='consent')
g_creds = g_appflow.run_local_server()
g_service = build(API_SERVICE_NAME, API_VERSION, credentials=g_creds)
g_count_uploaded = 0
def ReadFile(fileContentText):
parsed_html = BeautifulSoup(fileContentText, features="html.parser")
htmlBody = parsed_html.body.find('div', attrs={'class': 'main-content'})
divData = ''.join(map(str, htmlBody.contents))
return divData
def addPost(postTitle, postContent):
postBody = {
"kind": "blogger#post",
"id": BLOGID,
"title": postTitle,
"content": postContent
}
global g_service
while True:
try:
postsInsertAction = g_service.posts().insert(blogId=BLOGID, body=postBody,
isDraft=False)
posts = postsInsertAction.execute()
break
except Exception as ex:
logging.info("rateLimitExceeded, wait for 60 seconds......")
time.sleep(60)
global g_count_uploaded
g_count_uploaded = g_count_uploaded + 1
logging.info(str(g_count_uploaded) + ', post('+postTitle+') is uploaded.')
def getPostTitleList():
dictReturn = {}
nextPageToken = None
global g_service
postsListAction = g_service.posts().list(blogId=BLOGID, maxResults=20,
fetchBodies=False, pageToken=nextPageToken)
posts = postsListAction.execute()
# posts is dict, posts.items is list, posts.items[0] is dict
while True:
items = posts['items']
for item in items:
itemTitle = item['title']
dictReturn[itemTitle] = item['id']
# logging.debug('title='+itemTitle+',id='+item['id'])
postsListAction = g_service.posts().list(blogId=BLOGID, maxResults=20,
fetchBodies=False, pageToken=nextPageToken)
posts = postsListAction.execute()
if 'nextPageToken' in posts:
nextPageToken = posts['nextPageToken']
else:
break
return dictReturn
def main():
postTitleList = getPostTitleList()
listOfFiles = os.listdir(FILE_FOLDER)
for fileName in listOfFiles:
if fnmatch.fnmatch(fileName, FILE_PATTERN) == False:
logging.info('file ('+fileName+') is not valid. skip.')
continue
postTitle = fileName.replace('.html', ' - GooglePlus')
if postTitle in postTitleList:
logging.info('post ('+postTitle+') exists. skip.')
continue
logging.debug("file name: " + fileName)
with open(FILE_FOLDER+fileName, mode='r', encoding='UTF-8') as f:
fileContent = f.read()
fileContent = ReadFile(fileContentText=fileContent)
addPost(postTitle=postTitle, postContent=fileContent)
logging.info('completed!')
if __name__ == '__main__':
main()
No comments:
Post a Comment