Python源码示例:pypandoc.convert_text()

示例1
def _convert_md_table_to_rst(table):
    """Convert a markdown table to rst format"""
    if len(table) < 3:
        return ''
    out = '```eval_rst\n.. list-table::\n   :header-rows: 1\n\n'
    for i,l in enumerate(table):
        cols = l.split('|')[1:-1]
        if i == 0:
            ncol = len(cols)
        else:
            if len(cols) != ncol:
                return ''
        if i == 1:
            for c in cols:
                if len(c) is not 0 and '---' not in c:
                    return ''
        else:
            for j,c in enumerate(cols):
                out += '   * - ' if j == 0 else '     - '
                out += pypandoc.convert_text(
                    c, 'rst', format='md').replace('\n', ' ').replace('\r', '') + '\n'
    out += '```\n'
    return out 
示例2
def create(self, variables, md_output, pdf_output):
        env = Environment(loader=PackageLoader('qanta', 'reporting/templates'))
        template = env.get_template(self.template)
        markdown = template.render(variables)
        if md_output is not None:
            with open(md_output, 'w') as f:
                f.write(markdown)
        try:
            import pypandoc
            pypandoc.convert_text(
                markdown,
                'pdf',
                format='md',
                outputfile=pdf_output,
                extra_args=['-V', 'geometry:margin=.75in']
            )
        except Exception as e:
            log.warn('Pandoc was not installed or there was an error calling it, omitting PDF report')
            log.warn(str(e)) 
示例3
def convert_issue_data(self, redmine_issue):
        """
        Generate the data for a new GitHub issue
        """
        description_md = convert_text(
            redmine_issue['description'], 'markdown_github', 'textile'
        )
        porting_note = '###### ported from Redmine #%s (created %s)' % (
            redmine_issue['id'],
            redmine_issue['created_on'].split('T')[0]
        )
        if self.is_closed(redmine_issue):
            porting_note = '%s (CLOSED %s)' % (
                porting_note,
                redmine_issue['closed_on'].split('T')[0]
            )
        body = "%s\n\n%s" % (porting_note, description_md)
        title = "%(subject)s (RM#%(id)s)" % redmine_issue
        return {
            "title": title,
            "body": body,
            "assignees": ["adam-iris"],
        } 
示例4
def fill_notebook(work_notebook, script_blocks, gallery_conf):
    """Writes the Jupyter notebook cells

    If available, uses pypandoc to convert rst to markdown.

    Parameters
    ----------
    script_blocks : list
        Each list element should be a tuple of (label, content, lineno).
    """
    for blabel, bcontent, lineno in script_blocks:
        if blabel == 'code':
            add_code_cell(work_notebook, bcontent)
        else:
            if gallery_conf["pypandoc"] is False:
                markdown = rst2md(bcontent + '\n')
            else:
                import pypandoc
                # pandoc automatically addds \n to the end
                markdown = pypandoc.convert_text(
                    bcontent, to='md', format='rst', **gallery_conf["pypandoc"]
                )
            add_markdown_cell(work_notebook, markdown) 
示例5
def _convert_md_table_to_rst(table):
    """Convert a markdown table to rst format"""
    if len(table) < 3:
        return ''
    out = '```eval_rst\n.. list-table::\n   :header-rows: 1\n\n'
    for i,l in enumerate(table):
        cols = l.split('|')[1:-1]
        if i == 0:
            ncol = len(cols)
        else:
            if len(cols) != ncol:
                return ''
        if i == 1:
            for c in cols:
                if len(c) is not 0 and '---' not in c:
                    return ''
        else:
            for j,c in enumerate(cols):
                out += '   * - ' if j == 0 else '     - '
                out += pypandoc.convert_text(
                    c, 'rst', format='md').replace('\n', ' ').replace('\r', '') + '\n'
    out += '```\n'
    return out 
示例6
def rst_to_notebook(infile, outfile, diridx=False):
    """Convert an rst file to a notebook file."""

    # Read infile into a string
    with open(infile, 'r') as fin:
        rststr = fin.read()
    # Convert string from rst to markdown
    mdfmt = 'markdown_github+tex_math_dollars+fenced_code_attributes'
    mdstr = pypandoc.convert_text(rststr, mdfmt, format='rst',
                                  extra_args=['--atx-headers'])
    # In links, replace .py extensions with .ipynb
    mdstr = re.sub(r'\(([^\)]+).py\)', r'(\1.ipynb)', mdstr)
    # Links to subdirectories require explicit index file inclusion
    if diridx:
        mdstr = re.sub(r']\(([^\)/]+)\)', r'](\1/index.ipynb)', mdstr)
    # Enclose the markdown within triple quotes and convert from
    # python to notebook
    mdstr = '"""' + mdstr + '"""'
    nb = py2jn.py_string_to_notebook(mdstr)
    py2jn.tools.write_notebook(nb, outfile, nbver=4) 
示例7
def pandoc_process(app, what, name, obj, options, lines):
    """"Convert docstrings in Markdown into reStructureText using pandoc
    """

    if not lines:
        return None

    input_format = app.config.mkdsupport_use_parser
    output_format = 'rst'

    # Since default encoding for sphinx.ext.autodoc is unicode and pypandoc.convert_text, which will always return a
    # unicode string, expects unicode or utf-8 encodes string, there is on need for dealing with coding
    text = SEP.join(lines)
    text = pypandoc.convert_text(text, output_format, format=input_format)

    # The 'lines' in Sphinx is a list of strings and the value should be changed
    del lines[:]
    lines.extend(text.split(SEP)) 
示例8
def _convert_md_table_to_rst(table):
    """Convert a markdown table to rst format"""
    if len(table) < 3:
        return ''
    out = '```eval_rst\n.. list-table::\n   :header-rows: 1\n\n'
    for i,l in enumerate(table):
        cols = l.split('|')[1:-1]
        if i == 0:
            ncol = len(cols)
        else:
            if len(cols) != ncol:
                return ''
        if i == 1:
            for c in cols:
                if len(c) is not 0 and '---' not in c:
                    return ''
        else:
            for j,c in enumerate(cols):
                out += '   * - ' if j == 0 else '     - '
                out += pypandoc.convert_text(
                    c, 'rst', format='md').replace('\n', ' ').replace('\r', '') + '\n'
    out += '```\n'
    return out 
示例9
def convert(content, from_format, to_format, use_file=False):
    if use_file:
        filename = make_file(to_format)
    else:
        filename = None
    output = pypandoc.convert_text(
        content, to_format, format=from_format, outputfile=filename)
    if use_file:
        content = read_file(filename)
        try:
            return content.decode('UTF-8')
        except UnicodeDecodeError:
            return content.decode('latin-1')
    else:
        return output 
示例10
def __init__(self, source_data):
        try:
            import pypandoc
        except ImportError as e:
            # pypandoc package may do not installed in the system since the package is
            # an optional dependency
            raise PypandocImportError(e)

        super().__init__(pypandoc.convert_text(source_data, "html", format="mediawiki")) 
示例11
def convert_rst_to_md(text):
    return pypandoc.convert_text(
        text, "md", format="rst", extra_args=["--wrap=preserve"]
    ) 
示例12
def html2markdown(html: str) -> str:
    """
    Returns the given HTML as equivalent Markdown-structured text.
    """
    try:
        return pypandoc.convert_text(html, 'md', format='html')
    except OSError:
        msg = "It's recommended to install the `pandoc` library for converting " \
              "HTML into Markdown-structured text. It tends to have better results" \
              "than `html2text`, which is now used as a fallback."
        print(msg)
        return html2text(html) 
示例13
def md2rst(comment):
    """Convert a comment from protobuf markdown to restructuredtext.

    This method:
    - Replaces proto links with literals (e.g. [Foo][bar.baz.Foo] -> `Foo`)
    - Resolves relative URLs to https://cloud.google.com
    - Runs pandoc to convert from markdown to restructuredtext
    """
    comment = _replace_proto_link(comment)
    comment = _replace_relative_link(comment)
    # Calling pypandoc.convert_text is slow, so we try to avoid it if there are
    # no special characters in the markdown.
    if any([i in comment for i in '`[]*_']):
        comment = pypandoc.convert_text(comment, 'rst', format='commonmark')
        # Comments are now valid restructuredtext, but there is a problem. They
        # are being inserted back into a descriptor set, and there is an
        # expectation that each line of a comment will begin with a space, to
        # separate it from the '//' that begins the comment. You would think
        # that we could ignore this detail, but it will cause formatting
        # problems down the line in gapic-generator because parsing code will
        # try to remove the leading space, affecting the indentation of lines
        # that actually do begin with a space, so we insert the additional
        # space now. Comments that are not processed by pypandoc will already
        # have a leading space, so should not be changed.
        comment = _insert_spaces(comment)
    return comment 
示例14
def read(self, contents, context=None):
        assert isinstance(contents, str)
        js = pypandoc.convert_text(contents, 'json', format=PANDOC_MARKDOWN_FORMAT)
        ast = ASTPlugin().loads(js)
        return ast 
示例15
def get_pandoc_api_version():
    import pypandoc
    return json.loads(pypandoc.convert_text('', 'json', format='markdown'))['pandoc-api-version'] 
示例16
def html2markdown(html):
    """Converts `html` to Markdown-formatted text
    """
    markdown_text = pypandoc.convert_text(html, 'markdown_strict', format='html')
    return markdown_text 
示例17
def convert_rst_to_md(text):
    return pypandoc.convert_text(
        text, "md", format="rst", extra_args=["--wrap=preserve"]
    ) 
示例18
def convert_rst_to_md(text):
    return pypandoc.convert_text(
        text, "md", format="rst", extra_args=["--wrap=preserve"]
    ) 
示例19
def twlight_wikicode2html(value):
    """Passes string through pandoc and returns html"""
    output = pypandoc.convert_text(value, "html", format="mediawiki")
    return output 
示例20
def twlight_wikicode2html(value):
    """Passes string through pandoc and returns html"""
    output = pypandoc.convert_text(value, "html", format="mediawiki")
    return output 
示例21
def pandoc_convert(text, to="html5", args=[], outputfile=None):
    fr = Settings.new().get_value('input-format').get_string() or "markdown"
    args.extend(["--quiet"])
    return pypandoc.convert_text(text, to, fr, extra_args=args, outputfile=outputfile) 
示例22
def convert(source: str, to: str, extra_args=(),
            output_file: str=None) -> None:
    """
    Convert a source document to an output file.

    Parameters
    ----------
    source : str
    to : str
    extra_args : iterable
    output_file : str

    Notes
    -----
    Either writes to ``output_file`` or prints to stdout.
    """
    output_name = (
        os.path.splitext(os.path.basename(output_file))[0]
        if output_file is not None
        else 'std_out'
    )

    standalone = '--standalone' in extra_args
    self_contained = '--self-contained' in extra_args
    use_prompt = '--use-prompt' in extra_args
    extra_args = [item for item in extra_args if item != '--use-prompt']
    stitcher = Stitch(name=output_name, to=to, standalone=standalone,
                      self_contained=self_contained, use_prompt=use_prompt)
    result = stitcher.stitch(source)
    result = json.dumps(result)
    newdoc = pypandoc.convert_text(result, to, format='json',
                                   extra_args=extra_args,
                                   outputfile=output_file)

    if output_file is None:
        print(newdoc) 
示例23
def tokenize(source: str) -> dict:
    """
    Convert a document to pandoc's JSON AST.
    """
    return json.loads(pypandoc.convert_text(source, 'json', 'markdown')) 
示例24
def tokenize_block(source: str, pandoc_extra_args: list=None) -> list:
    """
    Convert a Jupyter output to Pandoc's JSON AST.
    """
    if pandoc_extra_args is None:
        pandoc_extra_args = []
    json_doc = pypandoc.convert_text(source, to='json', format='markdown', extra_args=pandoc_extra_args)
    return json.loads(json_doc)['blocks'] 
示例25
def as_json(document):
    "JSON representation of the markdown document"
    return json.loads(pypandoc.convert_text(document, 'json',
                                            format='markdown')) 
示例26
def handle_law_from_xml(self, book, book_xml) -> LawBook:
        previous_law = None
        law_order = 1

        # Parse XML tree
        tree = etree.fromstring(book_xml)

        for sect in tree.xpath('sect1'):
            section_title = sect.xpath('title/text()')[0]
            logger.debug('Section: %s' % section_title)

            # if section_title == 'Grundgesetz für die Bundesrepublik Deutschland':
            #     continue

            book.add_section(from_order=law_order, title=section_title.strip())

            for law_key, law_raw in enumerate(sect.xpath('sect2')):
                law_title = law_raw.xpath('title')[0]
                law_title.getparent().remove(law_title)

                # law_docbook = tostring(law_raw).decode('utf-8')
                law_docbook = '\n'.join(tostring(x).decode('utf-8') for x in law_raw.iterchildren())
                law_text = pypandoc.convert_text(law_docbook, 'html', format='docbook')
                law_section = tostring(law_title, method="text").decode('utf-8').strip()

                law = Law(book=book,
                          title="',
                          section=law_section,
                          slug=slugify(law_section),
                          content=law_text,
                          previous=previous_law,
                          order=law_order
                          )
                law.save()
                law_order += 1
                previous_law = law

        return book 
示例27
def render_to_format(request, format, title, template_src, context):
    if format in dict(settings.EXPORT_FORMATS):

        # render the template to a html string
        template = get_template(template_src)
        html = template.render(context)

        # remove empty lines
        html = os.linesep.join([line for line in html.splitlines() if line.strip()])

        if format == 'html':

            # create the response object
            response = HttpResponse(html)

        else:
            if format == 'pdf':
                # check pandoc version (the pdf arg changed to version 2)
                if pypandoc.get_pandoc_version().split('.')[0] == '1':
                    args = ['-V', 'geometry:margin=1in', '--latex-engine=xelatex']
                else:
                    args = ['-V', 'geometry:margin=1in', '--pdf-engine=xelatex']

                content_disposition = 'filename="%s.%s"' % (title, format)
            else:
                args = []
                content_disposition = 'attachment; filename="%s.%s"' % (title, format)

            # use reference document for certain file formats
            refdoc = set_export_reference_document(format)
            if refdoc is not None and (format == 'docx' or format == 'odt'):
                if pypandoc.get_pandoc_version().startswith("1"):
                    refdoc_param = '--reference-' + format + '=' + refdoc
                    args.extend([refdoc_param])
                else:
                    refdoc_param = '--reference-doc=' + refdoc
                    args.extend([refdoc_param])

            # create a temporary file
            (tmp_fd, tmp_filename) = mkstemp('.' + format)

            log.info("Export " + format + " document using args " + str(args))
            # convert the file using pandoc
            pypandoc.convert_text(html, format, format='html', outputfile=tmp_filename, extra_args=args)

            # read the temporary file
            file_handler = os.fdopen(tmp_fd, 'rb')
            file_content = file_handler.read()
            file_handler.close()

            # delete the temporary file
            os.remove(tmp_filename)

            # create the response object
            response = HttpResponse(file_content, content_type='application/%s' % format)
            response['Content-Disposition'] = content_disposition.encode('utf-8')

        return response
    else:
        return HttpResponseBadRequest(_('This format is not supported.')) 
示例28
def gen_release_notes(path):
    """
    Generate reStructuredText files for "Release Notes". It generates 'index.rst' file and
    each rst file for each version's release note under 'whatsnew' directory.
    The contents are from Github release notes.
    """
    whatsnew_dir = "%s/whatsnew" % path
    shutil.rmtree(whatsnew_dir, ignore_errors=True)
    os.mkdir(whatsnew_dir)

    with open("%s/index.rst" % whatsnew_dir, "a") as index_file:
        title = "Release Notes"

        index_file.write("=" * len(title))
        index_file.write("\n")
        index_file.write(title)
        index_file.write("\n")
        index_file.write("=" * len(title))
        index_file.write("\n")
        index_file.write("\n")
        index_file.write(".. toctree::")
        index_file.write("   :maxdepth: 1")
        index_file.write("\n")
        index_file.write("\n")

        for name, tag_name, body in list_releases_to_document(ks.__version__):
            release_doc = pypandoc.convert_text(body, "rst", format="md")

            # Make PR reference link pretty.
            # Replace ", #..." to ", `...<https://github.com/databricks/koalas/pull/...>`_"
            release_doc = re.sub(
                r', #(\d+)',
                r', `#\1 <https://github.com/databricks/koalas/pull/\1>`_', release_doc)
            # Replace "(#..." to "(`...<https://github.com/databricks/koalas/pull/...>`_"
            release_doc = re.sub(
                r'\(#(\d+)',
                r'(`#\1 <https://github.com/databricks/koalas/pull/\1>`_', release_doc)

            index_file.write("   " + tag_name)
            index_file.write("\n")
            index_file.write("\n")

            with open("%s/%s.rst" % (whatsnew_dir, tag_name), "a") as release_file:
                release_file.write("=" * len(name))
                release_file.write("\n")
                release_file.write(name)
                release_file.write("\n")
                release_file.write("=" * len(name))
                release_file.write("\n")
                release_file.write("\n")
                release_file.write(release_doc)
                release_file.write("\n")
                release_file.write("\n") 
示例29
def convert(self, text):
        text = '\n\n'.join([re.sub(self.regexCodeBlock, r'<pre>\1</pre>', block) for block in text.split('\n\n')])

        collapseResults = re.findall(self.regexCollapse, text)
        if len(collapseResults) > 0:
            for i in range(0, len(collapseResults)):
                text = text.replace(collapseResults[i][0], "<details>")
                text = text.replace(collapseResults[i][2], "<summary>{}</summary> \n\n{}".format(collapseResults[i][1], collapseResults[i][2]))
                text = text.replace(collapseResults[i][3], "</details>")
        text = re.sub(self.regexParagraph, "", text)

        # convert from textile to markdown
        try:
            text = pypandoc.convert_text(text, 'markdown_strict', format='textile')

            # pandoc does not convert everything, notably the [[link|text]] syntax
            # is not handled. So let's fix that.

            # [[ wikipage | link_text ]] -> [link_text](wikipage)
            text = re.sub(self.regexWikiLinkWithText, self.wiki_link, text, re.MULTILINE | re.DOTALL)

            # [[ link_url ]] -> [link_url](link_url)
            text = re.sub(self.regexWikiLinkWithoutText, self.wiki_link, text, re.MULTILINE | re.DOTALL)

            # nested lists, fix at least the common issues
            text = text.replace("    \\#\\*", "    -")
            text = text.replace("    \\*\\#", "    1.")

            # Redmine is using '>' for blockquote, which is not textile
            text = text.replace("&gt; ", ">")

            # wiki note macros
            text = re.sub(self.regexTipMacro, r'---\n**TIP**: \1\n---\n', text, re.MULTILINE | re.DOTALL)
            text = re.sub(self.regexNoteMacro, r'---\n**NOTE**: \1\n---\n', text, re.MULTILINE | re.DOTALL)
            text = re.sub(self.regexWarningMacro, r'---\n**WARNING**: \1\n---\n', text, re.MULTILINE | re.DOTALL)
            text = re.sub(self.regexImportantMacro, r'---\n**IMPORTANT**: \1\n---\n', text, re.MULTILINE | re.DOTALL)

            # all other macros
            text = re.sub(self.regexAnyMacro, r'\1', text, re.MULTILINE | re.DOTALL)

            # attachments in notes
            text = re.sub(self.regexAttachment, r"\n\n*(Merged from Redmine, please check first note for attachment named **\1**)*", text, re.MULTILINE | re.DOTALL)

            # code highlight
            codeHighlights = re.findall(self.regexCodeHighlight, text)
            if len(codeHighlights) > 0:
                for i in range(0, len(codeHighlights)):
                    text = text.replace(codeHighlights[i][0], "\n```{}".format(codeHighlights[i][2].lower()))
                    text = text.replace(codeHighlights[i][3], "\n```")
        except RuntimeError as e:
            return False
        return text 
示例30
def convert_ipynb_to_gallery(nb, new_file):
    python_file = ""

    nb_dict = json.load(open(nb, encoding="utf8", errors='ignore'))
    cells = nb_dict['cells']

    for i, cell in enumerate(cells):
        if i == 0:
            if cell['cell_type'] != 'markdown':
                rst_source = os.path.basename(file_name[:-5])
                rst_source = bytes(rst_source, 'utf-8').decode('utf-8', 'ignore')
                python_file = '"""\n' + rst_source + '\n"""'
                source = ''.join(cell['source'])
                python_file = python_file + '\n' * 2 + source

            else:
                b = cell['source']
                print(b)
                a = bytes(cell['source'][0], 'utf-8').decode('utf-8', 'ignore')
                print(a)
                md_source = ''.join(a)
                rst_source = pdoc.convert_text(md_source, 'rst', 'md')
                print(rst_source)
                rst_source = bytes(rst_source, 'utf-8').decode('utf-8', 'ignore')
                python_file = '"""\n' + rst_source + '\n"""'
        else:
            if cell['cell_type'] == 'markdown':
                md_source = ''.join(cell['source'])
                rst_source = pdoc.convert_text(md_source, 'rst', 'md')
                rst_source = rst_source.encode().decode('utf-8', 'ignore')
                commented_source = '\n'.join(['# ' + x for x in
                                              rst_source.split('\n')])
                #python_file = python_file + '\n\n\n' + '#' * 70 + '\n' + \
                #    commented_source

                python_file = python_file + '\n\n\n' + '# %%' + '\n' + \
                              commented_source

            elif cell['cell_type'] == 'code':
                source = ''.join(cell['source'])
                python_file = python_file + '\n' * 2 + '# %% \n' + source

    python_file = python_file.replace("\n%", "\n# %")
    open(new_file, 'w', newline='',  errors='ignore').write(python_file)

#%%