Monday, March 11, 2019

Use Python to migrate all posts from Google+ to BlogSpot (blogger)

Google+ is going to be closed down very soon.

I built some Python 3.72 script to migrate all posts from Google+ to BlogSpot (blogger).

Below is the steps.

1. Download credentials.json from:

2. Log in,get BlogId from URL

3. Download Google+ backup,then uncompress it to a folder

4. Modify the "BLOGID" in source code, post files and credentials.json file path(highlighted)

5. Run the script only allow uploading around 700 posts per day, so we will get "rate Limit Exceeded" exception during the uploading. This script will automatically re-try uploading the file when that happens.

To differentiate these migrated posts to the original posts, I added ' - GooglePlus' to the end of the post title.

Photos are not migrated.

The source code is shared here.


import time
import logging
import os
import fnmatch
from html.parser import HTMLParser

from bs4 import BeautifulSoup
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request

logging.basicConfig(format='%(asctime)s,%(msecs)d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',

# FILE_FOLDER = 'C:\\EricFang\\Python3\\'
FILE_FOLDER = '\\\\pwdaddy\\Posts\\'
FILE_CREDENTIALS = 'C:\\EricFang\\Python3\\AddPostsToBlogSpot\\credentials.json'
FILE_PATTERN = '*.html'

BLOGID = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxx'
API_SERVICE_NAME = 'blogger'

# SCOPES = ['']
SCOPES = ['']

g_appflow = InstalledAppFlow.from_client_secrets_file(
auth_url, _ = g_appflow.authorization_url(prompt='consent')

g_creds = g_appflow.run_local_server()

g_service = build(API_SERVICE_NAME, API_VERSION, credentials=g_creds)

g_count_uploaded = 0

def ReadFile(fileContentText):
    parsed_html = BeautifulSoup(fileContentText, features="html.parser")
    htmlBody = parsed_html.body.find('div', attrs={'class': 'main-content'})
    divData = ''.join(map(str, htmlBody.contents))
    return divData

def addPost(postTitle, postContent):
    postBody = {
        "kind": "blogger#post",
        "id": BLOGID,
        "title": postTitle,
        "content": postContent
    global g_service
    while True:
            postsInsertAction = g_service.posts().insert(blogId=BLOGID, body=postBody,
            posts = postsInsertAction.execute()
        except Exception as ex:
  "rateLimitExceeded, wait for 60 seconds......")

    global g_count_uploaded
    g_count_uploaded = g_count_uploaded + 1 + ', post('+postTitle+') is uploaded.')

def getPostTitleList():
    dictReturn = {}
    nextPageToken = None
    global g_service
    postsListAction = g_service.posts().list(blogId=BLOGID, maxResults=20,
                                             fetchBodies=False, pageToken=nextPageToken)
    posts = postsListAction.execute()
    # posts is dict, posts.items is list, posts.items[0] is dict
    while True:
        items = posts['items']
        for item in items:
            itemTitle = item['title']
            dictReturn[itemTitle] = item['id']
            # logging.debug('title='+itemTitle+',id='+item['id'])

        postsListAction = g_service.posts().list(blogId=BLOGID, maxResults=20,
                                                 fetchBodies=False, pageToken=nextPageToken)
        posts = postsListAction.execute()
        if 'nextPageToken' in posts:
            nextPageToken = posts['nextPageToken']
    return dictReturn

def main():
    postTitleList = getPostTitleList()

    listOfFiles = os.listdir(FILE_FOLDER)
    for fileName in listOfFiles:
        if fnmatch.fnmatch(fileName, FILE_PATTERN) == False:
  'file ('+fileName+') is not valid. skip.')

        postTitle = fileName.replace('.html', ' - GooglePlus')
        if postTitle in postTitleList:
  'post ('+postTitle+') exists. skip.')

        logging.debug("file name: " + fileName)
        with open(FILE_FOLDER+fileName, mode='r', encoding='UTF-8') as f:
            fileContent =

        fileContent = ReadFile(fileContentText=fileContent)

        addPost(postTitle=postTitle, postContent=fileContent)'completed!')

if __name__ == '__main__':

No comments:

Post a Comment