From 1b6e5ee722f76f594e3d95dfa8d77ac2b9f2173a Mon Sep 17 00:00:00 2001 From: Filipp Lepalaan Date: Mon, 21 Mar 2016 19:14:37 +0200 Subject: Adding GSX articles --- servo/lib/utils.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'servo/lib') diff --git a/servo/lib/utils.py b/servo/lib/utils.py index 2994fb3..7d9e21c 100644 --- a/servo/lib/utils.py +++ b/servo/lib/utils.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import json +import html2text import subprocess from django.http import HttpResponse from django.core.cache import cache @@ -107,3 +108,19 @@ class SessionSerializer: def loads(self, data): return json.loads(data, cls=DjangoJSONEncoder) + + +def unescape(s): + import HTMLParser + html_parser = HTMLParser.HTMLParser() + return html_parser.unescape(s) + + +def html_to_text(s, ignore_images=False): + h = html2text.HTML2Text() + h.ignore_images = ignore_images + return h.handle(s) + + +def gsx_to_text(s): + return html_to_text(unescape(s)) -- cgit v1.2.3