diff --git a/MANIFEST.in b/MANIFEST.in index 9561fb1..70656c8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1,2 @@ include README.rst +prune tests diff --git a/README.rst b/README.rst index 51888ea..55ea7cf 100644 --- a/README.rst +++ b/README.rst @@ -1,8 +1,8 @@ |build| |version| |license| |downloads| -.. |build| image:: https://img.shields.io/github/workflow/status/matthewwithanm/python-markdownify/Python%20application/develop +.. |build| image:: https://img.shields.io/github/actions/workflow/status/matthewwithanm/python-markdownify/python-app.yml?branch=develop :alt: GitHub Workflow Status - :target: https://github.com/matthewwithanm/python-markdownify/actions?query=workflow%3A%22Python+application%22 + :target: https://github.com/matthewwithanm/python-markdownify/actions/workflows/python-app.yml?query=workflow%3A%22Python+application%22 .. |version| image:: https://img.shields.io/pypi/v/markdownify :alt: Pypi version @@ -87,7 +87,11 @@ strong_em_symbol sub_symbol, sup_symbol Define the chars that surround ```` and ```` text. Defaults to an empty string, because this is non-standard behavior. Could be something like - ``~`` and ``^`` to result in ``~sub~`` and ``^sup^``. + ``~`` and ``^`` to result in ``~sub~`` and ``^sup^``. If the value starts + with ``<`` and ends with ``>``, it is treated as an HTML tag and a ``/`` is + inserted after the ``<`` in the string used after the text; this allows + specifying ```` to use raw HTML in the output for subscripts, for + example. newline_style Defines the style of marking linebreaks (``
``) in markdown. The default @@ -123,6 +127,11 @@ escape_underscores If set to ``False``, do not escape ``_`` to ``\_`` in text. Defaults to ``True``. +escape_misc + If set to ``False``, do not escape miscellaneous punctuation characters + that sometimes have Markdown significance in text. + Defaults to ``True``. + keep_inline_images_in Images are converted to their alt-text when the images are located inside headlines or table cells. If some inline images should be converted to diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 86226d2..cd66a39 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -43,15 +43,22 @@ def abstract_inline_conversion(markup_fn): """ This abstracts all simple inline tags like b, em, del, ... Returns a function that wraps the chomped text in a pair of the string - that is returned by markup_fn. markup_fn is necessary to allow for + that is returned by markup_fn, with '/' inserted in the string used after + the text if it looks like an HTML tag. markup_fn is necessary to allow for references to self.strong_em_symbol etc. """ def implementation(self, el, text, convert_as_inline): - markup = markup_fn(self) + markup_prefix = markup_fn(self) + if markup_prefix.startswith('<') and markup_prefix.endswith('>'): + markup_suffix = '~#=+|-])', r'\\\1', text) + text = re.sub(r'([0-9])([.)])', r'\1\\\2', text) if self.options['escape_asterisks']: text = text.replace('*', r'\*') if self.options['escape_underscores']: @@ -315,7 +326,7 @@ def convert_list(self, el, text, convert_as_inline): def convert_li(self, el, text, convert_as_inline): parent = el.parent if parent is not None and parent.name == 'ol': - if parent.get("start"): + if parent.get("start") and str(parent.get("start")).isnumeric(): start = int(parent.get("start")) else: start = 1 @@ -377,13 +388,13 @@ def convert_figcaption(self, el, text, convert_as_inline): def convert_td(self, el, text, convert_as_inline): colspan = 1 - if 'colspan' in el.attrs: + if 'colspan' in el.attrs and el['colspan'].isdigit(): colspan = int(el['colspan']) return ' ' + text.strip().replace("\n", " ") + ' |' * colspan def convert_th(self, el, text, convert_as_inline): colspan = 1 - if 'colspan' in el.attrs: + if 'colspan' in el.attrs and el['colspan'].isdigit(): colspan = int(el['colspan']) return ' ' + text.strip().replace("\n", " ") + ' |' * colspan @@ -400,7 +411,7 @@ def convert_tr(self, el, text, convert_as_inline): # first row and is headline: print headline underline full_colspan = 0 for cell in cells: - if "colspan" in cell.attrs: + if 'colspan' in cell.attrs and cell['colspan'].isdigit(): full_colspan += int(cell["colspan"]) else: full_colspan += 1 diff --git a/setup.py b/setup.py index 9a26468..9a703d0 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ pkgmeta = { '__title__': 'markdownify', '__author__': 'Matthew Tretter', - '__version__': '0.12.1', + '__version__': '0.13.0', } read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read() diff --git a/tests/test_conversions.py b/tests/test_conversions.py index 1e685f3..a35b982 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -87,6 +87,16 @@ def test_code(): assert md('*this_should_not_escape*') == '`*this_should_not_escape*`' assert md('this should\t\tnormalize') == '`this should normalize`' assert md('this should\t\tnormalize') == '`this should normalize`' + assert md('foobarbaz') == '`foobarbaz`' + assert md('foobarbaz') == '`foobarbaz`' + assert md('foo bar baz') == '`foo bar baz`' + assert md('foo bar baz') == '`foo bar baz`' + assert md('foo bar baz') == '`foo bar baz`' + assert md('foo bar baz') == '`foo bar baz`' + assert md('foo bar baz') == '`foo bar baz`' + assert md('foo bar baz') == '`foo bar baz`' + assert md('foobarbaz', sup_symbol='^') == '`foobarbaz`' + assert md('foobarbaz', sub_symbol='^') == '`foobarbaz`' def test_del(): @@ -215,6 +225,17 @@ def test_pre(): assert md('
*this_should_not_escape*
') == '\n```\n*this_should_not_escape*\n```\n' assert md('
\t\tthis  should\t\tnot  normalize
') == '\n```\n\t\tthis should\t\tnot normalize\n```\n' assert md('
\t\tthis  should\t\tnot  normalize
') == '\n```\n\t\tthis should\t\tnot normalize\n```\n' + assert md('
foo\nbar\nbaz
') == '\n```\nfoo\nbar\nbaz\n```\n' + assert md('
foo\nbar\nbaz
') == '\n```\nfoo\nbar\nbaz\n```\n' + assert md('
foo\nbar\nbaz
') == '\n```\nfoo\nbar\nbaz\n```\n' + assert md('
foo\nbaz
') == '\n```\nfoo\nbaz\n```\n' + assert md('
foo\nbar\nbaz
') == '\n```\nfoo\nbar\nbaz\n```\n' + assert md('
foo\nbar\nbaz
') == '\n```\nfoo\nbar\nbaz\n```\n' + assert md('
foo\nbar\nbaz
') == '\n```\nfoo\nbar\nbaz\n```\n' + assert md('
foo\nbar\nbaz
') == '\n```\nfoo\nbar\nbaz\n```\n' + assert md('
foo\nbar\nbaz
') == '\n```\nfoo\nbar\nbaz\n```\n' + assert md('
foo\nbar\nbaz
', sup_symbol='^') == '\n```\nfoo\nbar\nbaz\n```\n' + assert md('
foo\nbar\nbaz
', sub_symbol='^') == '\n```\nfoo\nbar\nbaz\n```\n' def test_script(): @@ -247,11 +268,13 @@ def test_strong_em_symbol(): def test_sub(): assert md('foo') == 'foo' assert md('foo', sub_symbol='~') == '~foo~' + assert md('foo', sub_symbol='') == 'foo' def test_sup(): assert md('foo') == 'foo' assert md('foo', sup_symbol='^') == '^foo^' + assert md('foo', sup_symbol='') == 'foo' def test_lang(): diff --git a/tests/test_escaping.py b/tests/test_escaping.py index 2f3a83e..eaef77d 100644 --- a/tests/test_escaping.py +++ b/tests/test_escaping.py @@ -12,7 +12,7 @@ def test_underscore(): def test_xml_entities(): - assert md('&') == '&' + assert md('&') == r'\&' def test_named_entities(): @@ -25,4 +25,23 @@ def test_hexadecimal_entities(): def test_single_escaping_entities(): - assert md('&amp;') == '&' + assert md('&amp;') == r'\&' + + +def text_misc(): + assert md('\\*') == r'\\\*' + assert md('') == r'\' + assert md('# foo') == r'\# foo' + assert md('> foo') == r'\> foo' + assert md('~~foo~~') == r'\~\~foo\~\~' + assert md('foo\n===\n') == 'foo\n\\=\\=\\=\n' + assert md('---\n') == '\\-\\-\\-\n' + assert md('+ x\n+ y\n') == '\\+ x\n\\+ y\n' + assert md('`x`') == r'\`x\`' + assert md('[text](link)') == r'\[text](link)' + assert md('1. x') == r'1\. x' + assert md('not a number. x') == r'not a number. x' + assert md('1) x') == r'1\) x' + assert md('not a number) x') == r'not a number) x' + assert md('|not table|') == r'\|not table\|' + assert md(r'\ &amp; | ` `', escape_misc=False) == r'\ & | ` `' diff --git a/tests/test_lists.py b/tests/test_lists.py index 5a04430..35eee13 100644 --- a/tests/test_lists.py +++ b/tests/test_lists.py @@ -43,6 +43,9 @@ def test_ol(): assert md('
  1. a
  2. b
') == '1. a\n2. b\n' assert md('
  1. a
  2. b
') == '3. a\n4. b\n' + assert md('
  1. a
  2. b
') == '1. a\n2. b\n' + assert md('
  1. a
  2. b
') == '1. a\n2. b\n' + assert md('
  1. a
  2. b
') == '1. a\n2. b\n' def test_nested_ols(): diff --git a/tests/test_tables.py b/tests/test_tables.py index 9120c29..594e5bf 100644 --- a/tests/test_tables.py +++ b/tests/test_tables.py @@ -215,7 +215,7 @@ Age - Jill + Jill Smith 50 @@ -226,6 +226,17 @@ """ +table_with_undefined_colspan = """ + + + + + + + + +
NameAge
JillSmith
""" + def test_table(): assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' @@ -240,3 +251,4 @@ def test_table(): assert md(table_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_with_caption) == 'TEXT\n\nCaption\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n' assert md(table_with_colspan) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n' diff --git a/tox.ini b/tox.ini index 9eb8750..54ba143 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ envlist = py38 [testenv] passenv = PYTHONPATH deps = - pytest + pytest==8 flake8 restructuredtext_lint Pygments