Databricks: Check for Orphaned Workspace Directories

(Last Updated On: )

In this post I will show you how to check if your Databricks workspace has directories that are orphaned to users/service principals.

  1. import requests
  2. from requests.auth import HTTPBasicAuth
  3. import json
  4.  
  5. DATABRICKS_INSTANCE = dbutils.widgets.get('url')
  6. TOKEN = dbutils.widgets.get('token')
  7.  
  8. #Get list of directories
  9. endpoint = f'{DATABRICKS_INSTANCE}/api/2.0/workspace/list'
  10. params = {
  11. 'path': '/Users/'
  12. }
  13. response = requests.get(endpoint, params=params, auth=HTTPBasicAuth('token', TOKEN))
  14. response.raise_for_status()
  15. json_response = response.json()
  16.  
  17. for i, result in json_response.items():
  18. for value in result:
  19. directory = value["path"]
  20. user = directory.replace("/Users/", "")
  21.  
  22. if '@' in user:
  23. endpoint = f'{DATABRICKS_INSTANCE}/api/2.0/preview/scim/v2/Users'
  24. params = {
  25. 'filter': 'userName eq "%s"' % (user)
  26. }
  27. response = requests.get(endpoint, params=params, auth=HTTPBasicAuth('token', TOKEN))
  28. account = response.json().get('Resources', [])
  29. if not account:
  30. print('Account %s doesn't exist' % (account))
  31. else:
  32. endpoint = f'{DATABRICKS_INSTANCE}/api/2.0/preview/scim/v2/ServicePrincipals'
  33. params = {
  34. 'filter': 'applicationId eq "%s"' % (user)
  35. }
  36. response = requests.get(endpoint, params=params, auth=HTTPBasicAuth('token', TOKEN))
  37. sp = response.json().get('Resources', [])
  38. if not sp:
  39. print('SP %s doesn't exist' % (sp))