Downloading files with unicode characters from BaseHTTPServer in Python

1.4k views Asked by At

I'm using Python 2.7.8 to make an server from where I can download files. Problem is that many files include utf-8 characters such as čćžšđ and others. I tried decoding path but whenever I click on file name with unicode character it returns "error 404: file not found". How do I properly decode paths so it makes possible to download files with utf-8 characters and, if possible, show them as utf-8 character on index of my server. Here's my server's code, which includes what have I tried and full server code:

# -*- coding: utf-8 -*-

__version__ = "0.6"

__all__ = ["SimpleHTTPRequestHandler"]

import os
import posixpath
import BaseHTTPServer
import urllib
import cgi
import shutil
import mimetypes
from StringIO import StringIO
import SocketServer
import time
import sys
import unicodedata



class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):

    server_version = "SimpleHTTP/" + __version__

    def do_GET(self):
        """Serve a GET request."""
        f = self.send_head()
        if f:
        self.copyfile(f, self.wfile)
        f.close()

    def do_HEAD(self):
        """Serve a HEAD request."""
        f = self.send_head()
        if f:
            f.close()

    def send_head(self):
        path_now = self.translate_path(self.path)
        path_change = (os.path.dirname(os.path.abspath(__file__)) + "/files/")
        if path_now.startswith("/home/files/"):
           pass
        else:
           os.chdir(path_change)
        path = self.translate_path(self.path)
        """Those are few examples of what have I tried:
        path = path1.decode('ascii', 'ignore').makePath()
        path = unicodedata.normalize('NFKD', path1).encode('ascii','ignore')
        path2 = path1.decode("utf-8")
        path = path2.encode("utf-8")
        path = path2.encode("utf-8")"""
        f = None
        if os.path.isdir(path):
            for index in "index.html", "index.htm":
                index = os.path.join(path, index)
                if os.path.exists(index):
                    path = index
                    break
            else:
                return self.list_directory(path)
        ctype = self.guess_type(path)
        if ctype.startswith('text/'):
            mode = 'r'
        else:
            mode = 'rb'
        try:
            f = open(path.decode(sys.getfilesystemencoding()), mode) #this doesn't work, nothing changes
            size = os.path.getsize(path)
        except IOError:
            self.send_error(404, "File not found")
            return None
        self.send_response(200)
        self.send_header("Content-type", ctype + "; charset=utf-8") # + " charset=utf-8"
        self.send_header("Content-Length", size)
        self.end_headers()
        return f

    def list_directory(self, path):
        try:
            list = os.listdir(path)
        except os.error:
            self.send_error(404, "No permission to list directory")
            return None
        list.sort(lambda a, b: cmp(a.lower(), b.lower()))
        f = StringIO()
        f.write("<title>Directory listing for %s</title>\n" % self.path)
        f.write("<h2>Directory listing for %s</h2>\n" % self.path)
        f.write("<hr>\n<ul>\n")
        for name in list:
            fullname = os.path.join(path, name)
            displayname = linkname = name = cgi.escape(name)
            if os.path.isdir(fullname):
                displayname = name + "/"
                linkname = name + "/"
            if os.path.islink(fullname):
                displayname = name + "@"
            f.write('<li><a href="%s">%s</a>\n' % (linkname, displayname))
        f.write("</ul>\n<hr>\n")
        f.seek(0)
        self.send_response(200)
        self.send_header("Content-type", "text/html")
        self.end_headers()
        return f

    def translate_path(self, path):
        try:
            path = posixpath.normpath(urllib.unquote(path))
            words = path.split('/')
            words = filter(None, words)
            path = os.getcwd()
            for word in words:
                drive, word = os.path.splitdrive(word)
                head, word = os.path.split(word)
                if word in (os.curdir, os.pardir): continue
                path = os.path.join(path, word)
            return path
        except Exception, e:
           self.send_error(403, e)
           path = posixpath.normpath(urllib.unquote(path))
           words = path.split('/')
           words = filter(None, words)
           path = os.getcwd()
           for word in words:
               drive, word = os.path.splitdrive(word)
               head, word = os.path.split(word)
               if word in (os.curdir, os.pardir): continue
               path = os.path.join(path, word)
           return path.encode("utf-8")

    def copyfile(self, source, outputfile):
        shutil.copyfileobj(source, outputfile)

    def guess_type(self, path):
        base, ext = posixpath.splitext(path)
        if self.extensions_map.has_key(ext):
            return self.extensions_map[ext]
        ext = ext.lower()
        if self.extensions_map.has_key(ext):
            return self.extensions_map[ext]
        else:
            return self.extensions_map['']

    extensions_map = mimetypes.types_map.copy()
    extensions_map.update({
        '': 'application/octet-stream', # Default
        '.py': 'text/plain',
        '.c': 'text/plain',
        '.h': 'text/plain',
        })

class ForkingHTTPServer(SocketServer.ForkingMixIn, BaseHTTPServer.HTTPServer):
    def finish_request(self, request, client_address):
        request.settimeout(30)
        BaseHTTPServer.HTTPServer.finish_request(self, request, client_address)


def test(HandlerClass = SimpleHTTPRequestHandler, ServerClass = BaseHTTPServer.HTTPServer, server_address=("192.168.1.2", 8000)):
    try:
        print "Server started"
        srvr = ForkingHTTPServer(server_address, HandlerClass)
        srvr.serve_forever()  # serve_forever
    except KeyboardInterrupt:
        print "Closing sockets..."
        time.sleep(2)
        print "Server is shutting down in 3"
        time.sleep(1)
        print "Server is shutting down in 2"
        time.sleep(1)
        print "Server is shutting down in 1"
        time.sleep(1)
        srvr.socket.close()


if __name__ == '__main__':
        test()

I hope this is all information you need. If you need anything else, just comment and I'll be glad to edit my question ;)

1

There are 1 answers

0
Amar Kalabić On BEST ANSWER

I just saw that I was setting charset to utf-8 inside "send_head" function, but not in "list_directory" function. Also, I edited

"; charset=utf-8" 

to

'; charset="utf-8"' 

and it works like a charm now.

It looks like that I was encoding/decoding righ thing all the time but wasn't setting my headers right.