From 4a27fe9f16ecd4bc6f94ad89046767450316490c Mon Sep 17 00:00:00 2001 From: Simon Law Date: Mon, 23 Jul 2012 15:29:39 -0400 Subject: PDFResponse is more robust: * Now matches HttpResponse in function signature. * Modern Django content_type/mimetype handling. * Sanitizes and quotes filenames in Content-Disposition header. * Tests. --- wkhtmltopdf/tests.py | 42 +++++++++++++++++++++++++++++++++++++++++- wkhtmltopdf/utils.py | 30 ++++++++++++++++++++++++++++++ wkhtmltopdf/views.py | 16 +++++++++++----- 3 files changed, 82 insertions(+), 6 deletions(-) diff --git a/wkhtmltopdf/tests.py b/wkhtmltopdf/tests.py index 5f90c33..6b49378 100644 --- a/wkhtmltopdf/tests.py +++ b/wkhtmltopdf/tests.py @@ -10,7 +10,7 @@ from django.test import TestCase from .subprocess import CalledProcessError from .utils import _options_to_args, template_to_temp_file, wkhtmltopdf -from .views import PdfResponse, PdfTemplateView +from .views import PDFResponse, PdfResponse, PdfTemplateView class TestUtils(TestCase): @@ -69,7 +69,47 @@ class TestUtils(TestCase): class TestViews(TestCase): + def test_pdf_response(self): + """Should generate the correct HttpResonse object and mimetype""" + # 404 + response = PDFResponse(content='', status=404) + self.assertEqual(response.status_code, 404) + self.assertEqual(response.content, '') + self.assertEqual(response['Content-Type'], 'application/pdf') + self.assertFalse(response.has_header('Content-Disposition')) + + content = '%PDF-1.4\n%%EOF' + # Without filename + response = PDFResponse(content=content) + self.assertEqual(response.status_code, 200) + self.assertEqual(response.content, content) + self.assertEqual(response['Content-Type'], 'application/pdf') + self.assertFalse(response.has_header('Content-Disposition')) + + # With filename + response = PDFResponse(content=content, filename="nospace.pdf") + self.assertEqual(response['Content-Disposition'], + 'attachment; filename="nospace.pdf"') + response = PDFResponse(content=content, filename="one space.pdf") + self.assertEqual(response['Content-Disposition'], + 'attachment; filename="one space.pdf"') + response = PDFResponse(content=content, filename="4'5\".pdf") + self.assertEqual(response['Content-Disposition'], + 'attachment; filename="4\'5.pdf"') + response = PDFResponse(content=content, filename=u"♥.pdf") + self.assertEqual(response['Content-Disposition'], + 'attachment; filename="?.pdf"') + + # Content-Type + response = PDFResponse(content=content, + content_type='application/x-pdf') + self.assertEqual(response['Content-Type'], 'application/x-pdf') + response = PDFResponse(content=content, + mimetype='application/x-pdf') + self.assertEqual(response['Content-Type'], 'application/x-pdf') + def test_deprecated(self): + """Should warn when using deprecated views.""" with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') PdfTemplateView() diff --git a/wkhtmltopdf/utils.py b/wkhtmltopdf/utils.py index 7fe432a..fb8f0aa 100644 --- a/wkhtmltopdf/utils.py +++ b/wkhtmltopdf/utils.py @@ -81,3 +81,33 @@ def template_to_temp_file(template_name, dictionary=None, context_instance=None) f.write(smart_str(loader.render_to_string(template_name, dictionary=dictionary, context_instance=context_instance))) return tempfile_path + +def content_disposition_filename(filename): + """ + Sanitize a file name to be used in the Content-Disposition HTTP + header. + + Even if the standard is quite permissive in terms of + characters, there are a lot of edge cases that are not supported by + different browsers. + + See http://greenbytes.de/tech/tc2231/#attmultinstances for more details. + """ + filename = filename.replace(';', '').replace('"', '') + return http_quote(filename) + + +def http_quote(string): + """ + Given a unicode string, will do its dandiest to give you back a + valid ascii charset string you can use in, say, http headers and the + like. + """ + if isinstance(string, unicode): + try: + import unidecode + string = unidecode.unidecode(string) + except ImportError: + string = string.encode('ascii', 'replace') + # Wrap in double-quotes for ; , and the like + return '"{!s}"'.format(string.replace('\\', '\\\\').replace('"', '\\"')) diff --git a/wkhtmltopdf/views.py b/wkhtmltopdf/views.py index 8da86f5..539a669 100644 --- a/wkhtmltopdf/views.py +++ b/wkhtmltopdf/views.py @@ -1,3 +1,5 @@ +from __future__ import absolute_import + import os from re import compile import warnings @@ -8,23 +10,27 @@ from django.template.context import RequestContext from django.template.response import HttpResponse from django.views.generic import TemplateView -from wkhtmltopdf.utils import template_to_temp_file, wkhtmltopdf +from .utils import (content_disposition_filename, + template_to_temp_file, wkhtmltopdf) class PDFResponse(HttpResponse): - def __init__(self, content, *args, **kwargs): + def __init__(self, content, mimetype=None, status=200, + content_type='application/pdf', *args, **kwargs): filename = kwargs.pop('filename', None) - super(PDFResponse, self).__init__(content, 'application/pdf', *args, **kwargs) + super(PDFResponse, self).__init__(content, mimetype, status, + content_type, *args, **kwargs) if filename: + filename = content_disposition_filename(filename) header_content = 'attachment; filename={0}'.format(filename) - self.__setitem__('Content-Disposition', header_content) + self['Content-Disposition'] = header_content class PdfResponse(PDFResponse): def __init__(self, content, filename): warnings.warn('PdfResponse is deprecated in favour of PDFResponse. It will be removed in version 1.', PendingDeprecationWarning, 2) - super(PdfResponse, self).__init__(content, filename) + super(PdfResponse, self).__init__(content, filename=filename) class PDFTemplateView(TemplateView): -- cgit v1.2.3