diff options
author | Filipp Lepalaan <filipp@mac.com> | 2016-03-21 19:14:37 +0200 |
---|---|---|
committer | Filipp Lepalaan <filipp@mac.com> | 2016-03-21 19:14:37 +0200 |
commit | 1b6e5ee722f76f594e3d95dfa8d77ac2b9f2173a (patch) | |
tree | 61dd9498cac2b20501bf96f54a95a89d714d4449 /servo/lib | |
parent | f30d6d73513a5535cae4d97ee2cc85f01660750c (diff) | |
download | Servo-1b6e5ee722f76f594e3d95dfa8d77ac2b9f2173a.tar.gz Servo-1b6e5ee722f76f594e3d95dfa8d77ac2b9f2173a.tar.bz2 Servo-1b6e5ee722f76f594e3d95dfa8d77ac2b9f2173a.zip |
Adding GSX articles
Diffstat (limited to 'servo/lib')
-rw-r--r-- | servo/lib/utils.py | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/servo/lib/utils.py b/servo/lib/utils.py index 2994fb3..7d9e21c 100644 --- a/servo/lib/utils.py +++ b/servo/lib/utils.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import json +import html2text import subprocess from django.http import HttpResponse from django.core.cache import cache @@ -107,3 +108,19 @@ class SessionSerializer: def loads(self, data): return json.loads(data, cls=DjangoJSONEncoder) + + +def unescape(s): + import HTMLParser + html_parser = HTMLParser.HTMLParser() + return html_parser.unescape(s) + + +def html_to_text(s, ignore_images=False): + h = html2text.HTML2Text() + h.ignore_images = ignore_images + return h.handle(s) + + +def gsx_to_text(s): + return html_to_text(unescape(s)) |