linkedin-posts-scraper/scrape.py

from linkedin_api import Linkedin
import os

def fetch_posts(client, public_id=None, urn_id=None, post_count=10):
    """
    get_profile_posts: Get profile posts

    :param public_id: LinkedIn public ID for a profile
    :type public_id: str, optional
    :param urn_id: LinkedIn URN ID for a profile
    :type urn_id: str, optional
    :param post_count: Number of posts to fetch
    :type post_count: int, optional
    :return: List of posts
    :rtype: list
    """
    url_params = {
        "count": min(post_count, client._MAX_POST_COUNT),
        "start": 0,
        "q": "memberShareFeed",
        "moduleKey": "member-shares:phone",
        "includeLongTermHistory": True,
    }
    if urn_id:
        profile_urn = f"urn:li:fsd_profile:{urn_id}"
    else:
        profile = client.get_profile(public_id=public_id)
        profile_urn = profile["profile_urn"].replace(
            "fs_miniProfile", "fsd_profile"
        )
    url_params["profileUrn"] = profile_urn
    url = f"/identity/profileUpdatesV2"
    res = client._fetch(url, params=url_params)
    data = res.json()
    if data and "status" in data and data["status"] != 200:
        client.logger.info("request failed: {}".format(data["message"]))
        return {}
    while data and data["metadata"]["paginationToken"] != "":
        print(f"got {len(data['elements'])=}")
        if len(data["elements"]) >= post_count:
            break
        pagination_token = data["metadata"]["paginationToken"]
        url_params["start"] = url_params["start"] + client._MAX_POST_COUNT
        url_params["paginationToken"] = pagination_token
        print("new request", url_params)
        res = client._fetch(url, params=url_params)
        data["metadata"] = res.json()["metadata"]
        data["elements"] = data["elements"] + res.json()["elements"]
        data["paging"] = res.json()["paging"]
    return data["elements"]

# Authenticate using any Linkedin account credentials
api = Linkedin(os.getenv("LINKEDIN_USERNAME"), os.getenv("LINKEDIN_PASSWORD"))

# GET a profile
# profile = api.get_profile('jean-marc-jancovici')
# profile_id = "ACoAAAYJ-P4B8XcfHOiVTdmiAyYGfvxBRs3J_Ug"
urn_id = "ACoAAAYJ-P4B8XcfHOiVTdmiAyYGfvxBRs3J_Ug"

posts = fetch_posts(api, urn_id=urn_id, post_count=800)

import json
with open('./out_posts_all_1.json', 'w') as f:
    f.write(json.dumps(posts, indent=4))
initial commit 2023-02-19 11:10:12 +00:00			`from linkedin_api import Linkedin`
			`import os`

			`def fetch_posts(client, public_id=None, urn_id=None, post_count=10):`
			`"""`
			`get_profile_posts: Get profile posts`

			`:param public_id: LinkedIn public ID for a profile`
			`:type public_id: str, optional`
			`:param urn_id: LinkedIn URN ID for a profile`
			`:type urn_id: str, optional`
			`:param post_count: Number of posts to fetch`
			`:type post_count: int, optional`
			`:return: List of posts`
			`:rtype: list`
			`"""`
			`url_params = {`
			`"count": min(post_count, client._MAX_POST_COUNT),`
			`"start": 0,`
			`"q": "memberShareFeed",`
			`"moduleKey": "member-shares:phone",`
			`"includeLongTermHistory": True,`
			`}`
			`if urn_id:`
			`profile_urn = f"urn:li:fsd_profile:{urn_id}"`
			`else:`
			`profile = client.get_profile(public_id=public_id)`
			`profile_urn = profile["profile_urn"].replace(`
			`"fs_miniProfile", "fsd_profile"`
			`)`
			`url_params["profileUrn"] = profile_urn`
			`url = f"/identity/profileUpdatesV2"`
			`res = client._fetch(url, params=url_params)`
			`data = res.json()`
			`if data and "status" in data and data["status"] != 200:`
			`client.logger.info("request failed: {}".format(data["message"]))`
			`return {}`
			`while data and data["metadata"]["paginationToken"] != "":`
			`print(f"got {len(data['elements'])=}")`
			`if len(data["elements"]) >= post_count:`
			`break`
			`pagination_token = data["metadata"]["paginationToken"]`
			`url_params["start"] = url_params["start"] + client._MAX_POST_COUNT`
			`url_params["paginationToken"] = pagination_token`
			`print("new request", url_params)`
			`res = client._fetch(url, params=url_params)`
			`data["metadata"] = res.json()["metadata"]`
			`data["elements"] = data["elements"] + res.json()["elements"]`
			`data["paging"] = res.json()["paging"]`
			`return data["elements"]`

			`# Authenticate using any Linkedin account credentials`
			`api = Linkedin(os.getenv("LINKEDIN_USERNAME"), os.getenv("LINKEDIN_PASSWORD"))`

			`# GET a profile`
			`# profile = api.get_profile('jean-marc-jancovici')`
			`# profile_id = "ACoAAAYJ-P4B8XcfHOiVTdmiAyYGfvxBRs3J_Ug"`
			`urn_id = "ACoAAAYJ-P4B8XcfHOiVTdmiAyYGfvxBRs3J_Ug"`

			`posts = fetch_posts(api, urn_id=urn_id, post_count=800)`

			`import json`
			`with open('./out_posts_all_1.json', 'w') as f:`
			`f.write(json.dumps(posts, indent=4))`