Switch to Python-Markdown

Commonmark doesn't have any built-in HTML sanitization
This commit is contained in:
Tulir Asokan 2018-11-28 15:28:34 +02:00
parent 6a6e8a818e
commit c39cacbab4
3 changed files with 23 additions and 7 deletions

View File

@ -13,11 +13,13 @@
# #
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>. # along with this program. If not, see <https://www.gnu.org/licenses/>.
from typing import Dict, List, Union, Callable, Awaitable, Optional from typing import Dict, List, Union, Callable, Awaitable, Optional, Tuple
from markdown.extensions import Extension
import markdown as md
import attr import attr
import commonmark
from mautrix import Client as MatrixClient from mautrix import Client as MatrixClient
from mautrix.util.formatter import parse_html
from mautrix.client import EventHandler from mautrix.client import EventHandler
from mautrix.types import (EventType, MessageEvent, Event, EventID, RoomID, MessageEventContent, from mautrix.types import (EventType, MessageEvent, Event, EventID, RoomID, MessageEventContent,
MessageType, TextMessageEventContent, Format, RelatesTo) MessageType, TextMessageEventContent, Format, RelatesTo)
@ -25,6 +27,20 @@ from mautrix.types import (EventType, MessageEvent, Event, EventID, RoomID, Mess
from .command_spec import ParsedCommand, CommandSpec from .command_spec import ParsedCommand, CommandSpec
class EscapeHTML(Extension):
def extendMarkdown(self, md):
md.preprocessors.deregister("html_block")
md.inlinePatterns.deregister("html")
escape_html = EscapeHTML()
def parse_markdown(markdown: str, allow_html: bool = False) -> Tuple[str, str]:
html = md.markdown(markdown, extensions=[escape_html] if not allow_html else [])
return parse_html(html), html
class MaubotMessageEvent(MessageEvent): class MaubotMessageEvent(MessageEvent):
_client: MatrixClient _client: MatrixClient
@ -40,7 +56,7 @@ class MaubotMessageEvent(MessageEvent):
content = TextMessageEventContent(msgtype=MessageType.NOTICE, body=content) content = TextMessageEventContent(msgtype=MessageType.NOTICE, body=content)
if markdown: if markdown:
content.format = Format.HTML content.format = Format.HTML
content.formatted_body = commonmark.commonmark(content.body) content.body, content.formatted_body = parse_markdown(content.body)
if reply: if reply:
content.set_reply(self) content.set_reply(self)
return self._client.send_message_event(self.room_id, event_type, content) return self._client.send_message_event(self.room_id, event_type, content)
@ -65,8 +81,8 @@ class MaubotMatrixClient(MatrixClient):
def send_markdown(self, room_id: RoomID, markdown: str, msgtype: MessageType = MessageType.TEXT, def send_markdown(self, room_id: RoomID, markdown: str, msgtype: MessageType = MessageType.TEXT,
relates_to: Optional[RelatesTo] = None, **kwargs) -> Awaitable[EventID]: relates_to: Optional[RelatesTo] = None, **kwargs) -> Awaitable[EventID]:
content = TextMessageEventContent(msgtype=msgtype, body=markdown, format=Format.HTML, content = TextMessageEventContent(msgtype=msgtype, format=Format.HTML)
formatted_body=commonmark.commonmark(markdown)) content.body, content.formatted_body = parse_markdown(markdown)
if relates_to: if relates_to:
content.relates_to = relates_to content.relates_to = relates_to
return self.send_message(room_id, content, **kwargs) return self.send_message(room_id, content, **kwargs)

View File

@ -2,7 +2,7 @@ mautrix
aiohttp aiohttp
SQLAlchemy SQLAlchemy
alembic alembic
commonmark Markdown
ruamel.yaml ruamel.yaml
attrs attrs
bcrypt bcrypt

View File

@ -25,7 +25,7 @@ setuptools.setup(
"aiohttp>=3.0.1,<4", "aiohttp>=3.0.1,<4",
"SQLAlchemy>=1.2.3,<2", "SQLAlchemy>=1.2.3,<2",
"alembic>=1.0.0,<2", "alembic>=1.0.0,<2",
"commonmark>=0.8.1,<1", "Markdown>=3.0.0,<4",
"ruamel.yaml>=0.15.35,<0.16", "ruamel.yaml>=0.15.35,<0.16",
"attrs>=18.1.0,<19", "attrs>=18.1.0,<19",
"bcrypt>=3.1.4,<4", "bcrypt>=3.1.4,<4",