From c39cacbab491f0f95bfed17ad631aaeb4d2cbcc2 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Wed, 28 Nov 2018 15:28:34 +0200 Subject: [PATCH] Switch to Python-Markdown Commonmark doesn't have any built-in HTML sanitization --- maubot/matrix.py | 26 +++++++++++++++++++++----- requirements.txt | 2 +- setup.py | 2 +- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/maubot/matrix.py b/maubot/matrix.py index 7523369..c152af9 100644 --- a/maubot/matrix.py +++ b/maubot/matrix.py @@ -13,11 +13,13 @@ # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -from typing import Dict, List, Union, Callable, Awaitable, Optional +from typing import Dict, List, Union, Callable, Awaitable, Optional, Tuple +from markdown.extensions import Extension +import markdown as md import attr -import commonmark from mautrix import Client as MatrixClient +from mautrix.util.formatter import parse_html from mautrix.client import EventHandler from mautrix.types import (EventType, MessageEvent, Event, EventID, RoomID, MessageEventContent, MessageType, TextMessageEventContent, Format, RelatesTo) @@ -25,6 +27,20 @@ from mautrix.types import (EventType, MessageEvent, Event, EventID, RoomID, Mess from .command_spec import ParsedCommand, CommandSpec +class EscapeHTML(Extension): + def extendMarkdown(self, md): + md.preprocessors.deregister("html_block") + md.inlinePatterns.deregister("html") + + +escape_html = EscapeHTML() + + +def parse_markdown(markdown: str, allow_html: bool = False) -> Tuple[str, str]: + html = md.markdown(markdown, extensions=[escape_html] if not allow_html else []) + return parse_html(html), html + + class MaubotMessageEvent(MessageEvent): _client: MatrixClient @@ -40,7 +56,7 @@ class MaubotMessageEvent(MessageEvent): content = TextMessageEventContent(msgtype=MessageType.NOTICE, body=content) if markdown: content.format = Format.HTML - content.formatted_body = commonmark.commonmark(content.body) + content.body, content.formatted_body = parse_markdown(content.body) if reply: content.set_reply(self) return self._client.send_message_event(self.room_id, event_type, content) @@ -65,8 +81,8 @@ class MaubotMatrixClient(MatrixClient): def send_markdown(self, room_id: RoomID, markdown: str, msgtype: MessageType = MessageType.TEXT, relates_to: Optional[RelatesTo] = None, **kwargs) -> Awaitable[EventID]: - content = TextMessageEventContent(msgtype=msgtype, body=markdown, format=Format.HTML, - formatted_body=commonmark.commonmark(markdown)) + content = TextMessageEventContent(msgtype=msgtype, format=Format.HTML) + content.body, content.formatted_body = parse_markdown(markdown) if relates_to: content.relates_to = relates_to return self.send_message(room_id, content, **kwargs) diff --git a/requirements.txt b/requirements.txt index 2a918f9..ed45a6d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ mautrix aiohttp SQLAlchemy alembic -commonmark +Markdown ruamel.yaml attrs bcrypt diff --git a/setup.py b/setup.py index 2995b45..7f92fa0 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ setuptools.setup( "aiohttp>=3.0.1,<4", "SQLAlchemy>=1.2.3,<2", "alembic>=1.0.0,<2", - "commonmark>=0.8.1,<1", + "Markdown>=3.0.0,<4", "ruamel.yaml>=0.15.35,<0.16", "attrs>=18.1.0,<19", "bcrypt>=3.1.4,<4",