diff options
author | Filipp Lepalaan <filipp@mac.com> | 2016-05-01 23:54:58 +0300 |
---|---|---|
committer | Filipp Lepalaan <filipp@mac.com> | 2016-05-01 23:54:58 +0300 |
commit | 47cc21441b437fe148c72bd181059728b47bdf9b (patch) | |
tree | cd96c0a704cb31837c6e0b2e9affe998d24acd7b /servo/lib | |
parent | 65df597329bbb602dd382695c639aab3776123ea (diff) | |
parent | 82e1fe63447f5c4d33d8a3ed7c365e7eab1006c0 (diff) | |
download | Servo-47cc21441b437fe148c72bd181059728b47bdf9b.tar.gz Servo-47cc21441b437fe148c72bd181059728b47bdf9b.tar.bz2 Servo-47cc21441b437fe148c72bd181059728b47bdf9b.zip |
Merge branch 'develop'
Diffstat (limited to 'servo/lib')
-rw-r--r-- | servo/lib/utils.py | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/servo/lib/utils.py b/servo/lib/utils.py index 2994fb3..7d9e21c 100644 --- a/servo/lib/utils.py +++ b/servo/lib/utils.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import json +import html2text import subprocess from django.http import HttpResponse from django.core.cache import cache @@ -107,3 +108,19 @@ class SessionSerializer: def loads(self, data): return json.loads(data, cls=DjangoJSONEncoder) + + +def unescape(s): + import HTMLParser + html_parser = HTMLParser.HTMLParser() + return html_parser.unescape(s) + + +def html_to_text(s, ignore_images=False): + h = html2text.HTML2Text() + h.ignore_images = ignore_images + return h.handle(s) + + +def gsx_to_text(s): + return html_to_text(unescape(s)) |