Upload data to Sharepoint from Databricks using Python

33 views Asked by At

I have a requirement where I need to upload data from Databricks into a CSV format in a SharePoint folder. I am using the Client ID and Client Secret provided by the admin, but I am getting below error:

ValueError: Acquire app-only access token failed. HTTPSConnectionPool(host='adponline.sharepoint.com', port=443): Max retries exceeded with url: /folder/folder11 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate in certificate chain (_ssl.c:1129)')))

My Requirement is to be able to upload the CSV file to SharePoint folder successfully.

Code which is giving Token Error:

import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import pandas as pd
import io
import requests
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files import file
from office365.sharepoint.files.creation_information import FileCreationInformation
from pyspark.sql import SparkSession
 

# SharePoint site URL and credentials
sharepoint_config = {
        'url': 'https://online.sharepoint.com/sites/Folder1',
        'client_id': 'clientid-Cliendid-cliendid',
        'client_secret': 'clientsecret',
}

library_url = '/folderpath'

file_name = 'Test.csv'
df = sqlContext.sql("""Select column1 , column2 from Table1 LIMIT 10 """)
#Convert the DataFrame to Pandas DataFrame
pandas_df = df.toPandas()
#Convert the Pandas DataFrame to CSV
csv_data = pandas_df.to_csv(index=False).encode('utf-8')

ctx_auth = AuthenticationContext(url=sharepoint_config['url'])
print('Acquire the token...')
ctx_auth.acquire_token_for_app(client_id=sharepoint_config['client_id'], client_secret=sharepoint_config['client_secret'])
print('Establishing connection...')
ctx = ClientContext(sharepoint_config['url'], ctx_auth)
response = ctx.web.get_folder_by_server_relative_url(library_url).upload_file(file_name, csv_data).execute_query()

if response:
    print("Data uploaded successfully to SharePoint.")
else:
    print("Failed to upload data to SharePoint")

I tried other forums where they mentioned I can use the below code to get the token and pass it to above code. I am new to Pyspark and Databricks and totally unsure on how to get this done.

import msal
import json
import logging
import requests
import ast
import pyspark.pandas as ps
import pandas as pd
from datetime import datetime, timedelta
tenant_id = 'Provide sample TenantID' 
authority = f"https://login.microsoftonline.com/{tenant_id}"
app = msal.ConfidentialClientApplication(
    client_id=dbutils.secrets.get(scope = "engineering", key = "key"),
    client_credential=dbutils.secrets.get(scope = "engineering", key = "key"),
    authority=authority)
scopes = ["https://graph.microsoft.com/.default"]
result = None
result = app.acquire_token_silent(scopes, account=None)


#TOKEN VALID FOR ONLY 1 HOUR
if not result:
    print(
"No suitable token exists in cache. Let's get a new one from Azure Active Directory.")
    result = app.acquire_token_for_client(scopes=scopes)
if "access_token" in result:
    print("Access token is " + result["access_token"])
    now = datetime.now()
    refresh = now + timedelta(hours=-7,minutes = 50)
    print("Refresh Time: ", refresh)
0

There are 0 answers