What is the correct way to include subdomains to the Django sitemap urls?

2.6k views Asked by At

I have my development site (localhost.com 'as on the development machine').

This domain has got two subdomains, developer and blog.

The url configuration for sitemaps are,

from django.contrib.sitemaps.views import sitemap, index as sitemap_index

url(r'^sitemap\.xml$', sitemap_index, {'sitemaps': sitemaps},
    name='django.contrib.sitemaps.views.sitemap'),

url(r'^sitemap-(?P<section>.+)\.xml', sitemap, {'sitemaps': sitemaps}),

when creating sitemaps with sitemap index, The site maps are created as

<sitemap>
<loc>http://localhost.com/sitemap-blog.xml?p=2</loc>
</sitemap>
<sitemap>
<loc>http://localhost.com/sitemap-blog.xml?p=3</loc>
</sitemap>
<sitemap>
<loc>http://localhost.com/sitemap-blog.xml?p=4</loc>
</sitemap>

I want the sitemap on the subdomain, that is blog.example.com

so I overwrote the index view on django.contrib.sitemap.views by changing the absolute_url to blog.sitemaps as follows

from django.contrib.sitemaps.views import x_robots_tag
from django.contrib.sites.shortcuts import get_current_site
from django.core import urlresolvers
from django.template.response import TemplateResponse

@x_robots_tag
def index(request, sitemaps,
          template_name='sitemap_index.xml', content_type='application/xml',
          sitemap_url_name='django.contrib.sitemaps.views.sitemap'):

    req_protocol = request.scheme
    req_site = get_current_site(request)

    sites = []
    for section, site in sitemaps.items():
        if callable(site):
            site = site()
        protocol = req_protocol if site.protocol is None else site.protocol
        sitemap_url = urlresolvers.reverse(
            sitemap_url_name, kwargs={'section': section})
        absolute_url = '%s://blog.%s%s' % (protocol, req_site.domain, sitemap_url)
        sites.append(absolute_url)
        for page in range(2, site.paginator.num_pages + 1):
            sites.append('%s?p=%s' % (absolute_url, page))

    return TemplateResponse(request, template_name, {'sitemaps': sites},
                            content_type=content_type)

So the output the subdomain index is something like this,

<sitemap>
<loc>http://blog.localhost.com/sitemap-whos.xml?p=3</loc>
</sitemap>
<sitemap>
<loc>http://blog.localhost.com/sitemap-whos.xml?p=4</loc>
</sitemap>

What is the correct way to make django sitemap framework to pick up the dynamic subdomains to the sitemap url?

I use django-subdomains package

4

There are 4 answers

0
Nasir On

For a more general version of All Іѕ Vаиітy answer you can use this for any subdomain you might need:

class FixedSitemap(Sitemap):
    priority = 0.5
    changefreq = 'monthly'
    protocol = 'https'

    def items(self):
        # Add all your items here
        return ['docs.yourdomain.io']

    def location(self, obj):
        return obj

    def _urls(self, page, protocol, domain):
        return super(FixedSitemap, self)._urls(page, protocol, '')
0
Anthony Manning-Franklin On

My solution extends just two classes to create a set of reuseable components for sitemaps with subdomains.

First I created a new SubdomainSite class implementing the interface of django.contrib.sites.models.Site

from __future__ import unicode_literals

from django.utils.encoding import python_2_unicode_compatible


@python_2_unicode_compatible
class SubdomainSite(object):
    """
    SubdomainSite shares the interface of Site and adds subdomain support.
    """
    def __init__(self, subdomain, site=None):
        self.subdomain = subdomain
        self.extend_site(site)

    def __str__(self):
        return self.domain

    def extend_site(self, site):
        """Always returns the root level site extended with subdomain."""
        if issubclass(site.__class__, self.__class__):
            return self.extend_site(site.root_site)
        elif hasattr(site, 'domain'):
            self.root_site = site
        self.domain = self.name = '{0}.{1}'.format(self.subdomain, site)
        return self

    def save(self, force_insert=False, force_update=False):
        raise NotImplementedError('RequestSite cannot be saved.')

    def delete(self):
        raise NotImplementedError('RequestSite cannot be deleted.')

This is then used with a class SubdomainSitemap I created which extends Sitemap. This class only adds a subdomain attribute and adds two lines to get_urls - it's not as complicated as it looks, the original class just crams a little too much into one function.

from django.contrib.sitemaps import Sitemap


class SubdomainSitemap(Sitemap):
    """Adds subdomain support to sitemaps"""
    subdomain = None

    def get_urls(self, page=1, site=None, protocol=None):
        """Always uses this sitemap's subdomain if supplied."""
        # Determine protocol
        if self.protocol is not None:
            protocol = self.protocol
        if protocol is None:
            protocol = 'http'

        # Determine domain
        if site is None and self.subdomain is None:
            if django_apps.is_installed('django.contrib.sites'):
                Site = django_apps.get_model('sites.Site')
                try:
                    site = Site.objects.get_current()
                except Site.DoesNotExist:
                    pass
            if site is None:
                raise ImproperlyConfigured(
                    "To use sitemaps, either enable the sites framework or pass "
                    "a Site/RequestSite object in your view."
                )
        else:
            # Setting a subdomain site overrides supplied site
            site = self.subdomain
        domain = site.domain

        if getattr(self, 'i18n', False):
            urls = []
            current_lang_code = translation.get_language()
            for lang_code, lang_name in settings.LANGUAGES:
                translation.activate(lang_code)
                urls += self._urls(page, protocol, domain)
            translation.activate(current_lang_code)
        else:
            urls = self._urls(page, protocol, domain)

        return urls

Now tie it all together in your sitemaps classes!

from django.contrib.sites.models import Site
from sitemaps import SubdomainSite, SubdomainSitemap
from blog.models import Post

current_site = Site.objects.get_current()


class BlogSitemap(SubdomainSitemap):
    changefreq = 'monthly'
    subdomain = SubdomainSite('blog', current_site)
    protocol = 'https'

    def items(self):
        return Post.objects.all()

Voila!

0
Waccess Studio On

Marty!

I've found a great solution for my needs:

  1. No need in django-subdomains, just use simple middleware taken from here:

    class SubdomainMiddleware:
    """ Make the subdomain publicly available to classes """
    
        def process_request(self, request):
            domain_parts = request.get_host().split('.')
            if (len(domain_parts) > 2):
                subdomain = domain_parts[0]
                if (subdomain.lower() == 'www'):
                    subdomain = None
                domain = '.'.join(domain_parts[1:])
            else:
                subdomain = None
                domain = request.get_host()
    
            request.subdomain = subdomain
            request.domain = domain
    
  2. If you don't use 'sitemap index' alter sitemap view in django.contrib.sitemap.views by adding two variables req_domain and req_subdomain that are now in all requests:

find

    req_protocol = request.scheme
    req_site = get_current_site(request)

add two new lines:

    req_domain = request.domain
    req_subdomain = request.subdomain

then find

    urls.extend(site.get_urls(page=page, site=req_site,
                                  protocol=req_protocol))

and make it look like this:

    urls.extend(site.get_urls(page=page, site=req_site, r_domain=req_domain, 
                                  r_subdomain=req_subdomain, protocol=req_protocol))
  1. Now alter __init__.py in sitemap root dir:

in class Sitemap make get_urls function look like this def get_urls(self, page=1, r_domain=None, r_subdomain=None, site=None, protocol=None)

find th line domain = site.domain, comment it out and add below:

domain = r_domain
subdomain = r_subdomain

now alter this code below:

if getattr(self, 'i18n', False):
        urls = []
        current_lang_code = translation.get_language()
        for lang_code, lang_name in settings.LANGUAGES:
            translation.activate(lang_code)
            urls += self._urls(page, protocol, domain)
        translation.activate(current_lang_code)
    else:
        urls = self._urls(page, protocol, domain)

    return urls

so it looks like this:

if getattr(self, 'i18n', False):
        urls = []
        current_lang_code = translation.get_language()
        for lang_code, lang_name in settings.LANGUAGES:
            translation.activate(lang_code)
            urls += self._urls(page, protocol, domain, subdomain)
        translation.activate(current_lang_code)
    else:
        urls = self._urls(page, protocol, domain, subdomain)

    return urls
  1. find def _urls(self, page, protocol, domain) function below and make it look like this def _urls(self, page, protocol, domain, subdomain)

and in this function below find:

loc = "%s://%s%s" % (protocol, domain, self.__get('location', item))

and replace it with this:

loc = "%s://%s.%s%s" % (protocol, subdomain, domain, self.__get('location', item))
  1. profit!
0
All Іѕ Vаиітy On

You can simply override _urls() method in your sitemap class and include a super call with the domain as the subdomain + host form.

class BlogSitemap(Sitemap):
    def _urls(self, page, protocol, domain):
        return super(BlogSitemap, self)._urls(
            page=page, protocol=protocol, domain='docs.djangoproject.com')