Timeout Help regarding Highrise activity counting

155 views Asked by At

I'm trying to build a simple Python script to count how many notes each user has entered in a Highrise CRM system, in the last 365 days. I have created a script that works for a tiny data set (a Highrise system with only 10 notes), but it times out on larger data sets (I assume because my script is horribly inefficient due to my lack of Python skills).

I am working on this, using Nitrous.io for the environment, using Python 3.3. I'm using the Highton wrapper for the Highrise API calls (I haven't figured out how to read the API key in from a file successfully, but I can get it to work by typing the API key and username in directly -- tips here would be useful, but my big focus is getting the script to run on a production-size Highrise environment.)

Can anyone offer recommendations on how to do this more elegantly/correctly?

My Python script is:

# Using https://github.com/seibert-media/Highton to integrate with Highrise CRM
# Change to Python 3.3 with this command:  source py3env/bin/activate
# Purpose:  Count activity by Highrise CRM user in the last 365 days
from highton import Highton
from datetime import date, datetime, timedelta

#initialize Highrise instance
#keyfile = open('highrisekeys.txt', 'r')
#highrise_key = keyfile.readline()
#highrise_user = keyfile.readline()
#print('api key = ', api_key, 'user = ', api_user)

high = Highton(
    api_key = 'THIS_IS_A_SECRET',
    user = 'SECRET'
)

users = high.get_users()
#print('users is type: ', type(users))
#for user in users:
#  print('Users: ', user.name)

people = high.get_people()
#print('people is type: ', type(people))

notes = []
tmp_notes = []
for person in people:
  #print('Person: ', person.first_name, person.last_name)
  #person_highrise_id = person.highrise_id
  #print(person.last_name)
  tmp_notes = high.get_person_notes(person.highrise_id)
  if (type(tmp_notes) is list):
    notes.extend(high.get_person_notes(person.highrise_id)) # No quotes for person_highrise_id in ()'s

  #print('Notes is type ', type(notes), ' for ', person.first_name, ' ', person.last_name)

#print('total number of notes is ', len(notes))

for user in users:
  #print(user.name, ' has ', notes.author_id.count(user.highrise_id), ' activities')
  counter = 0
  for note in notes:
    if (note.author_id == user.highrise_id) and (note.created_at > datetime.utcnow() + timedelta(days = -365)):
      counter += 1
  print(user.name, ' has performed ', counter, ' activities')

The error message I got was:

Traceback (most recent call last):                                     File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 544, in urlopen                                                  

    body=body, headers=headers)                                                                                                                                                                             File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 341, in _make_request                                            

    self._validate_conn(conn)                                                                                                                                                                               File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 761, in _validate_conn                                           

    conn.connect()                                                                                                                                                                                          File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connection.py", line 204, in connect                                                  

    conn = self._new_conn()                                                                                                                                                                                 File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connection.py", line 134, in _new_conn                                                

    (self.host, self.port), self.timeout, **extra_kw)                                                                                                                                                       File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/util/connection.py", line 64, in create_connection                                         
    for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):                                                                                                                                     socket.gaierror: [Errno -2] Name or service not known           

During handling of the above exception, another exception occurred:   

                                                                                                                                                                                                          Traceback (most recent call last):                                     File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/adapters.py", line 370, in send                                                     

    timeout=timeout                                                                                                                                                                                         File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 597, in urlopen                                                  

    _stacktrace=sys.exc_info()[2])                                                                                                                                                                          File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/util/retry.py", line 245, in increment                                                

    raise six.reraise(type(error), error, _stacktrace)                                                                                                                                                      File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/packages/six.py", line 309, in reraise                                                  

    raise value.with_traceback(tb)                                                                                                                                                                          File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 544, in urlopen                                                  

    body=body, headers=headers)                                                                                                                                                                             File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 341, in _make_request                                            

    self._validate_conn(conn)                                                                                                                                                                               File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 761, in _validate_conn                                           

    conn.connect()                                                                                                                                                                                          File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connection.py", line 204, in connect                                                  

    conn = self._new_conn()                                                                                                                                                                                 File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connection.py", line 134, in _new_conn                                                

    (self.host, self.port), self.timeout, **extra_kw)                                                                                                                                                       File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/util/connection.py", line 64, in create_connection                                         
    for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):                                                                                                                                     requests.packages.urllib3.exceptions.ProtocolError: ('Connection aborted.', gaierror(-2, 'Name or service not known')) 

During handling of the above exception, another exception occurred:   

                                                                                                                                                                                                          Traceback (most recent call last):                                     File "highrise-analysis.py", line 35, in <module>                     

    tmp_notes = high.get_person_notes(person.highrise_id)                                                                                                                                                   File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/highton/highton.py", line 436, in get_person_notes                                         

    return self._get_notes(subject_id, 'people')                                                                                                                                                            File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/highton/highton.py", line 433, in _get_notes                                               

    highrise_type, subject_id)), Note)                                                                                                                                                                      File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/highton/highton.py", line 115, in _get_data                                                

    content = self._get_request(endpoint, params).content                                                                                                                                                   File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/highton/highton.py", line 44, in _get_request                                              

    params=params,                                                                                                                                                                                          File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/api.py", line 69, in get                                                       

    return request('get', url, params=params, **kwargs)                                                                                                                                                     File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/api.py", line 50, in request                                                   

    response = session.request(method=method, url=url, **kwargs)                                                                                                                                            File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/sessions.py", line 465, in request                                                  

    resp = self.send(prep, **send_kwargs)                                                                                                                                                                   File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/sessions.py", line 573, in send                                                     

    r = adapter.send(request, **kwargs)                                                                                                                                                                     File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/adapters.py", line 415, in send                                                     

    raise ConnectionError(err, request=request)                                                                                                                                                           requests.exceptions.ConnectionError: ('Connection aborted.', gaierror(-2, 'Name or service not known'))
2

There are 2 answers

1
user5056470 On

Mike,

What you are doing is going through all the users, and for each one then going through all of the notes. Once you have the user there should be a way to query for just the notes that belong to that user. You probably can include the date range in the query and just do a .count to see how many records match.

If you can't search notes by user, then go through the notes once and store the userId and the sum of that users notes that match your criteria in a dictionary. Then you can match up the userid's with the users table.

Good luck

0
Mike K On

Problem Solved: The Highrise API is rate limited to 500 requests per 10 second period from the same IP address for the same account, which I was exceeding when extracting the data. To resolve this, I added a time.sleep(.5) command to pause between each note data-pull per person, to avoid crossing that rate limit threshold.

In addition, I broke the code into 2 separate functions: 1. Extract the users, people, and notes data and store them as local files with pickle, so I didn't need to pull the data each time I wanted to do some analysis 2. Perform analysis on the extracted pickle files

I also needed to add a try / except KeyError conditional, as some notes were created by Highrise users who are no longer active (people who left the company)

Here's revised code:

# Using https://github.com/seibert-media/Highton to integrate with Highrise CRM
# Change to Python 3.3 with this command:  source py3env/bin/activate
# Purpose:  Count activity by Highrise CRM user in the last 365 days
from highton import Highton
from datetime import date, datetime, timedelta
import time
import pickle
# ===================================================================
def Create_Notes_Backup(highrise_key, highrise_user, notesfile, userfile, peoplefile, trailing_days = 365):
  # Function to create new Notes backup file of Highrise instance (this can take a while)
  print('Entered Create_Notes_Backup function')
  high = Highton(api_key = highrise_key, user = highrise_user) # Connect to API
  print('Connected to Highrise')
  users = high.get_users()
  print('Pulled ', len(users), ' users')
  people = high.get_people()
  print('Pulled ', len(people), ' people')

  notes = []
  tmp_notes = []
  print('Started creating notes array')
  for person in people:
    tmp_notes = high.get_person_notes(person.highrise_id)
    time.sleep(.5) # Pause per API limits https://github.com/basecamp/highrise-api
    if (type(tmp_notes) is list):
      print('Pulled ', len(tmp_notes), ' notes for ', person.first_name, ' ', person.last_name)
      if tmp_notes[0].created_at > datetime.utcnow() + timedelta(days = -trailing_days):
        notes.extend(high.get_person_notes(person.highrise_id)) # No quotes for person_highrise_id in ()'s
  print('Finished creating notes array')

  # Final Step:  Export lists into pickle files
  with open(notesfile, 'wb') as f:
    pickle.dump(notes, f)
  with open(userfile, 'wb') as g:
    pickle.dump(users, g)
  with open(peoplefile, 'wb') as h:
    pickle.dump(people, h)
  print('Exported lists to *.bak files')

# ===================================================================

def Analyze_Notes_Backup(notesfile, userfile, peoplefile, trailing_days = 365):
  # Function to analyze notes backup:
  #   1. Count number of activities in last trailing_days days
  #   2. Identify date of last note update
  print('Entered Analyze_Notes_Backup function')
  notes = []
  users = []
  people = []

  # Load the lists
  with open(notesfile, 'rb') as a:
    notes = pickle.load(a)
  with open(userfile, 'rb') as b:
    users = pickle.load(b)
  with open(peoplefile, 'rb') as c:
    people = pickle.load(c)

  # Start counting
  user_activity_count = {}
  last_user_update = {}
  for user in users:
    user_activity_count[user.highrise_id] = 0
    last_user_update[user.highrise_id] = date(1901, 1, 1)

  print('Started counting user activity by note')
  for note in notes:
    if note.created_at > datetime.utcnow() + timedelta(days = -trailing_days):
      #print('Note created ', note.created_at, ' by ', note.author_id, ' regarding ', note.body)
      try:      
        user_activity_count[note.author_id] += 1
      except KeyError:
        print('User no longer exists')
    try:
      if (note.created_at.date() > last_user_update[note.author_id]):
        last_user_update[note.author_id] = note.created_at.date()
    except KeyError:
      print('...')
  print('Finished counting user activity by note')
  print('=======================================')

  f = open('highrise-analysis-output.txt', 'w')
  f.write('Report run on ')
  f.write(str(date.today()))
  f.write('\n Highrise People Count: ')
  f.write(str(len(people)))
  f.write('\n ============================ \n')

  for user in users:
    print(user.name, ' has performed ', user_activity_count[user.highrise_id], ' activities')
    f.write(str.join(' ', (user.name, ', ', str(user_activity_count[user.highrise_id]))))
    if last_user_update[user.highrise_id] == date(1901, 1, 1):
      print(user.name, ' has not updated Highrise in the last 365 days')
      f.write(str.join(' ', (', NO_UPDATES\n')))
    else:
      print(user.name, ' last updated Highrise ', last_user_update[user.highrise_id])
      f.write(str.join(' ', (', ', str(last_user_update[user.highrise_id]), '\n')))
  all_done = time.time()

  f.close

# ===================================================================
if __name__ == "__main__":
  trailing_days = 365 # Number of days back to monitor

  # Production Environment Analysis
  Create_Notes_Backup(MY_API_KEY, MY_HIGHRISE_USERID, 'highrise-production-notes.bak', 'highrise-production-users.bak', 'highrise-production-people.bak', trailing_days = 365) # Production Environment
  Analyze_Notes_Backup('highrise-production-notes.bak', 'highrise-production-users.bak', 'highrise-production-people.bak', trailing_days = 365)