Eric Fang's SharePoint Boat: Use Python to migrate all posts from Google+ to BlogSpot (blogger)

Google+ is going to be closed down very soon.

I built some Python 3.72 script to migrate all posts from Google+ to BlogSpot (blogger).

Below is the steps.

1. Download credentials.json from:

https://developers.google.com/blogger/docs/3.0/using

2. Log in https://www.blogger.com，get BlogId from URL

3. Download Google+ backup，then uncompress it to a folder

4. Modify the "BLOGID" in source code, post files and credentials.json file path（highlighted）

5. Run the script

www.blogger.com only allow uploading around 700 posts per day, so we will get "rate Limit Exceeded" exception during the uploading. This script will automatically re-try uploading the file when that happens.

To differentiate these migrated posts to the original posts, I added ' - GooglePlus' to the end of the post title.

Photos are not migrated.

The source code is shared here.

================

import time
import logging
import os
import fnmatch
from html.parser import HTMLParser

from bs4 import BeautifulSoup
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request

logging.basicConfig(format='%(asctime)s,%(msecs)d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
datefmt='%Y-%m-%d:%H:%M:%S',
level=logging.DEBUG)

# FILE_FOLDER = 'C:\\EricFang\\Python3\\'
FILE_FOLDER = '\\\\pwdaddy\\Posts\\'
FILE_CREDENTIALS = 'C:\\EricFang\\Python3\\AddPostsToBlogSpot\\credentials.json'
FILE_PATTERN = '*.html'

BLOGID = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxx'
API_SERVICE_NAME = 'blogger'
API_VERSION = 'v3'

# https://developers.google.com/blogger/docs/3.0/using
# https://developers.google.com/identity/protocols/googlescopes
# SCOPES = ['https://www.googleapis.com/auth/blogger.readonly']
SCOPES = ['https://www.googleapis.com/auth/blogger']

g_appflow = InstalledAppFlow.from_client_secrets_file(
FILE_CREDENTIALS, SCOPES)
auth_url, _ = g_appflow.authorization_url(prompt='consent')

g_creds = g_appflow.run_local_server()

g_service = build(API_SERVICE_NAME, API_VERSION, credentials=g_creds)

g_count_uploaded = 0

def ReadFile(fileContentText):
parsed_html = BeautifulSoup(fileContentText, features="html.parser")
htmlBody = parsed_html.body.find('div', attrs={'class': 'main-content'})
divData = ''.join(map(str, htmlBody.contents))
return divData

def addPost(postTitle, postContent):
postBody = {
"kind": "blogger#post",
"id": BLOGID,
"title": postTitle,
"content": postContent
}
global g_service
while True:
try:
postsInsertAction = g_service.posts().insert(blogId=BLOGID, body=postBody,
isDraft=False)
posts = postsInsertAction.execute()
break
except Exception as ex:
logging.info("rateLimitExceeded, wait for 60 seconds......")
time.sleep(60)

global g_count_uploaded
g_count_uploaded = g_count_uploaded + 1
logging.info(str(g_count_uploaded) + ', post('+postTitle+') is uploaded.')

def getPostTitleList():
dictReturn = {}
nextPageToken = None
global g_service
postsListAction = g_service.posts().list(blogId=BLOGID, maxResults=20,
fetchBodies=False, pageToken=nextPageToken)
posts = postsListAction.execute()
# posts is dict, posts.items is list, posts.items[0] is dict
while True:
items = posts['items']
for item in items:
itemTitle = item['title']
dictReturn[itemTitle] = item['id']
# logging.debug('title='+itemTitle+',id='+item['id'])

postsListAction = g_service.posts().list(blogId=BLOGID, maxResults=20,
fetchBodies=False, pageToken=nextPageToken)
posts = postsListAction.execute()
if 'nextPageToken' in posts:
nextPageToken = posts['nextPageToken']
else:
break
return dictReturn

def main():
postTitleList = getPostTitleList()

listOfFiles = os.listdir(FILE_FOLDER)
for fileName in listOfFiles:
if fnmatch.fnmatch(fileName, FILE_PATTERN) == False:
logging.info('file ('+fileName+') is not valid. skip.')
continue

postTitle = fileName.replace('.html', ' - GooglePlus')
if postTitle in postTitleList:
logging.info('post ('+postTitle+') exists. skip.')
continue

logging.debug("file name: " + fileName)
with open(FILE_FOLDER+fileName, mode='r', encoding='UTF-8') as f:
fileContent = f.read()

fileContent = ReadFile(fileContentText=fileContent)

addPost(postTitle=postTitle, postContent=fileContent)

logging.info('completed!')

if __name__ == '__main__':
main()

Eric Fang's SharePoint Boat

Monday, March 11, 2019

Use Python to migrate all posts from Google+ to BlogSpot (blogger)

No comments:

Post a Comment