Using imp.load_source to dynamically load python modules AND packages

4.4k views Asked by At

I'm trying to dynamically load modules and packages from arbitrary folder locations in python 2.7. It works great with bare, single file modules. But trying to load in a package is a bit harder.

The best I could figure out was to load the init.py file inside the package (folder). But say for example I have this:

root:
  mod.py
  package:
    __init__.py
    sub.py

If mod.py contains:

from package import sub

Using my current loading code (below), it will fail stating that there is no package named "sub", unless I add the following to package/__init__.py

import sub

I have to imagine that this is because when you import a package it would normally also scan for all the other sub files in it. Do I also just need to do that manually, or is there a method similar to imp.load_source that will also handle package folders?

Loading code:

import md5
import sys
import os.path
import imp
import traceback
import glob

def load_package(path, base):
    try:
        try:
            sys.path.append(path + "/" + base)
            init = path + "/" + base + "/__init__.py"
            if not os.path.exists(init):
                return None

            fin = open(init, 'rb')

            return  (base, imp.load_source(base, init, fin))
        finally:
            try: fin.close()
            except: pass
    except ImportError, x:
        traceback.print_exc(file = sys.stderr)
        raise
    except:
        traceback.print_exc(file = sys.stderr)
        raise

def load_module(path):
    try:
        try:
            code_dir = os.path.dirname(path)
            code_file = os.path.basename(path)
            base = code_file.replace(".py", "")

            fin = open(path, 'rb')

            hash = md5.new(path).hexdigest() + "_" + code_file
            return  (base, imp.load_source(base, path, fin))
        finally:
            try: fin.close()
            except: pass
    except ImportError, x:
        traceback.print_exc(file = sys.stderr)
        raise
    except:
        traceback.print_exc(file = sys.stderr)
        raise

def load_folder(dir):
    sys.path.append(dir)
    mods = {}

    for p in glob.glob(dir + "/*/"):
        base = p.replace("\\", "").replace("/", "")
        base = base.replace(dir.replace("\\", "").replace("/", ""), "")
        package = load_package(dir, base) 
        if package:
            hash, pack = package
            mods[hash] = pack

    for m in glob.glob(dir + "/*.py"):
        hash, mod = load_module(m) 
        mods[hash] = mod

    return mods
1

There are 1 answers

1
Mr_and_Mrs_D On

The code below is functionally equivalent to your code modulo the traceback.print_exc (which you should let the client handle - if not handled the exception will end up printed anyway):

def _load_package(path, base):
    sys.path.append(path + "/" + base)
    init = path + "/" + base + "/__init__.py"
    if not os.path.exists(init):
        return None, None
    with open(init, 'rb') as fin:
        return base, imp.load_source(base, init, fin)

def _load_module(path):
    code_file = os.path.basename(path)
    base = code_file.replace(".py", "")
    with open(path, 'rb') as fin:
        return base, imp.load_source(base, path, fin)

def load_folder(dir):
    sys.path.append(dir)
    mods = {}
    for p in glob.glob(dir + "/*/"):
        base = p.replace("\\", "").replace("/", "")
        base = base.replace(dir.replace("\\", "").replace("/", ""), "")
        hash, pack = _load_package(dir, base)
        if hash: mods[hash] = pack
    for m in glob.glob(dir + "/*.py"): ##: /*/*.py
        hash, mod = _load_module(m)
        mods[hash] = mod
    return mods

## My added code
print('Python %s on %s' % (sys.version, sys.platform))

root_ = r'C:\Dropbox\eclipse_workspaces\python\sandbox\root'

def depyc(root, _indent=''): # deletes .pyc which will end up being imported
    if not _indent: print '\nListing', root
    for p in os.listdir(root):
        name = _indent + p
        abspath = os.path.join(root, p)
        if os.path.isdir(abspath):
            print name + ':'
            depyc(abspath, _indent=_indent + '  ')
        else:
            name_ = name[-4:]
            if name_ == '.pyc':
                os.remove(abspath)
                continue
            print name
    if not _indent: print

depyc(root_)
load_folder(root_)

Prints:

Python 2.7.10 (default, May 23 2015, 09:40:32) [MSC v.1500 32 bit (Intel)] on win32

Listing C:\Dropbox\eclipse_workspaces\python\sandbox\root
mod.py
package:
  sub.py
  __init__.py

C:\Dropbox\eclipse_workspaces\python\sandbox\root/package/__init__.py imported!
C:\Dropbox\eclipse_workspaces\python\sandbox\root\mod.py imported!

mod.py, sub.py and __init__.py just contain

print(__file__ + u' imported!')

Now modifying mod.py to:

from package import sub
print(__file__ + u' imported!')

we get indeed:

Listing....

C:\Dropbox\eclipse_workspaces\python\sandbox\root/package/__init__.py imported! <### this may move around ###>
Traceback (most recent call last):
  File "C:/Users/MrD/.PyCharm40/config/scratches/load_folder.py", line 57, in <module>
    load_folder(root_)
  File "C:/Users/MrD/.PyCharm40/config/scratches/load_folder.py", line 31, in load_folder
    hash, mod = _load_module(m)
  File "C:/Users/MrD/.PyCharm40/config/scratches/load_folder.py", line 20, in _load_module
    return base, imp.load_source(base, path, fin)
  File "C:\Dropbox\eclipse_workspaces\python\sandbox\root\mod.py", line 1, in <module>
    from package import sub
ImportError: cannot import name sub

Note the error is "cannot import name sub" and not "there is no package named "sub"". So why can't it ?

Modifying __init__.py:

# package/__init__.py    
print(__file__ + u' imported!')

print '__name__', '->', __name__
print '__package__', '->', __package__
print '__path__', '->', __path__

prints:

Listing...

C:\Dropbox\eclipse_workspaces\python\sandbox\root/package/__init__.py imported! <### not really ###>
__name__ -> package
__package__ -> None
__path__ ->
Traceback (most recent call last):
  File "C:/Users/MrD/.PyCharm40/config/scratches/load_folder.py", line 59, in <module>
    load_folder(root_)
  File "C:/Users/MrD/.PyCharm40/config/scratches/load_folder.py", line 30, in load_folder
    hash, pack = _load_package(dir, base)
  File "C:/Users/MrD/.PyCharm40/config/scratches/load_folder.py", line 14, in _load_package
    init = imp.load_source(base, init, fin)
  File "C:\Dropbox\eclipse_workspaces\python\sandbox\root/package/__init__.py", line 5, in <module>
    print '__path__', '->', __path__
NameError: name '__path__' is not defined

While directly importing it would print:

>>> sys.path.extend([r'C:\Dropbox\eclipse_workspaces\python\sandbox\root'])
>>> import package
C:\Dropbox\eclipse_workspaces\python\sandbox\root\package\__init__.py imported!
__name__ -> package
__package__ -> None
__path__ -> ['C:\\Dropbox\\eclipse_workspaces\\python\\sandbox\\root\\package']

So modify _load_package to:

def _load_package(path, base):
    pkgDir = os.path.abspath(os.path.join(path, base))
    init = os.path.join(pkgDir, "__init__.py")
    if not os.path.exists(init):
        return None, None
    file, pathname, description = imp.find_module(base, [path])
    print file, pathname, description # None, pkgDir, ('', '', 5)
    pack = sys.modules.get(base, None) # load_module will reload - yak!
    if pack is None:
        sys.modules[base] = pack = imp.load_module(base, file, pathname, description)
    return base, pack

Solves it as would:

...
    if pack is None:
        sys.modules[base] = pack = imp.load_module(base, None, '', description)
        pack.__path__ = [pkgDir]

or in your original code:

with open(init, 'rb') as fin:
    source = imp.load_source(base, init, fin)
    source.__path__ = path + "/" + base
    return base, source

So what's going on is that package relies on its __path __ attribute to function correctly.


Kept hacking on that and came up with:

import sys
import os.path
import imp

def _load_(root, name):
    file_object, pathname, description = imp.find_module(name, [root])
    pack = sys.modules.get(name, None)
    try:
        if pack is None:
            pack = imp.load_module(name, file_object, pathname, description)
        else:
            print 'In cache', pack
    finally:
        if file_object is not None: file_object.close()
    return name, pack

def load_folder(root):
    # sys.path.append(root)
    mods = {}
    paths = [(item, os.path.join(root, item)) for item in os.listdir(root)]
    packages = filter(lambda path_tuple: os.path.exists(
        os.path.join((path_tuple[1]), "__init__.py")), paths)
    py_files = filter(lambda path_tuple: path_tuple[0][-3:] == '.py', paths)
    del paths
    # first import packages as in original - modules may import from them
    for path, _abspath in packages:
        print 'Importing', _abspath
        key, mod = _load_(root, name=path) # will use pyc if available!
        mods[key] = mod
    # then modules
    for path, _abspath in py_files:
        print 'Importing', _abspath
        key, mod = _load_(root, name=path[:-3])
        mods[key] = mod
    return mods

I merged package and modules loading code dropping imp.load_source (one less tricky function) and relying on imp.load_module instead. I do not mess with sys.path directly and since imp.load_module will reload [!] I check the sys.modules cache. The mods dict returned is completelly untested - you have to somehow implement a hash (the _abspath should suffice).

Run as:

def depyc(root, rmpyc, _indent=''):
    if not _indent: print '\nListing', root
    for p in os.listdir(root):
        name = _indent + p
        abspath = os.path.join(root, p)
        if os.path.isdir(abspath):
            print name + ':'
            depyc(abspath, rmpyc, _indent=_indent + '  ')
        else:
            if rmpyc and name[-4:] == '.pyc':
                os.remove(abspath)
                continue
            print name
    if not _indent: print

## Run ##
print('Python %s on %s' % (sys.version, sys.platform))
root_ = os.path.join(os.getcwdu(), u'root')
depyc(root_, False) # False will end up importing the pyc files !
load_folder(root_)

to test various scenarios -

The code with an example root/ dir is here