Databricks: Check for Orphaned Workspace Directories

(Last Updated On: )

In this post I will show you how to check if your Databricks workspace has directories that are orphaned to users/service principals.

import requests
from requests.auth import HTTPBasicAuth
import json

DATABRICKS_INSTANCE = dbutils.widgets.get('url')
TOKEN = dbutils.widgets.get('token')

#Get list of directories
endpoint = f'{DATABRICKS_INSTANCE}/api/2.0/workspace/list'
params = {
    'path': '/Users/'
}
response = requests.get(endpoint, params=params, auth=HTTPBasicAuth('token', TOKEN))
response.raise_for_status()
json_response = response.json()

for i, result in json_response.items():
    for value in result:
        directory = value["path"]
        user = directory.replace("/Users/", "")

        if '@' in user:
            endpoint = f'{DATABRICKS_INSTANCE}/api/2.0/preview/scim/v2/Users'
            params = {
                'filter': 'userName eq "%s"' % (user)
            }
            response = requests.get(endpoint, params=params, auth=HTTPBasicAuth('token', TOKEN))
            account = response.json().get('Resources', [])
            if not account:
                print('Account %s doesn't exist' % (account))
        else:
            endpoint = f'{DATABRICKS_INSTANCE}/api/2.0/preview/scim/v2/ServicePrincipals'
            params = {
                'filter': 'applicationId eq "%s"' % (user)
            }
            response = requests.get(endpoint, params=params, auth=HTTPBasicAuth('token', TOKEN))
            sp = response.json().get('Resources', [])
            if not sp:
                print('SP %s doesn't exist' % (sp))

Leave a Reply