A Markata plugin to create automatic descriptions for markdown documents. It
does this by grabbing the first {len} number of characters from the document
that are in a paragraph.
Configuration
Open up your markata.toml file and add new entries for your
auto_descriptions. You can have multiple desriptions, each one will be named
after the key you give it in your config.
[markata]
# make sure its in your list of hooks
hooks=[
"markata.plugins.auto_description",
]
[markata.auto_description.description]
len=160
[markata.auto_description.long_description]
len=250
[markata.auto_description.super_description]
len=500
Note
Make sure that you have the auto_description plugin in your configured hooks.
In the above we will end up with three different descritpions,
(description, long_description, and super_description) each will be the
first number of characters from the document as specified in the config.
Defaults
By default markata will set description to 160 and long_description to 250,
if they are not set in your config.
Using the Description
Downstream hooks can now use the description for things such as seo, or feeds. Here is a simple example that lists all of the descriptions in all posts. This is a handy thing you can do right from a repl.
from markata import Markata
m = Markata()
[p["description"] for p in m.articles]
get_description function
Get the full-length description for a single post using the commonmark parser. Only paragraph nodes will count as text towards the description.
get_description source
def get_description(article: "Post") -> str:
"""
Get the full-length description for a single post using the commonmark
parser. Only paragraph nodes will count as text towards the description.
"""
ast = _parser.parse(article.content)
# find all paragraph nodes
paragraph_nodes = [
n[0]
for n in ast.walker()
if n[0].t == "paragraph" and n[0].first_child.literal is not None
]
# for reasons unknown to me commonmark duplicates nodes, dedupe based on sourcepos
sourcepos = [p.sourcepos for p in paragraph_nodes]
# find first occurence of node based on source position
unique_mask = [sourcepos.index(s) == i for i, s in enumerate(sourcepos)]
# deduplicate paragraph_nodes based on unique source position
unique_paragraph_nodes = list(compress(paragraph_nodes, unique_mask))
paragraphs = " ".join([p.first_child.literal for p in unique_paragraph_nodes])
return paragraphs
set_description function
For a given article, find the description, put it in the cache, and set
the configured descriptions for the article.
set_description source
def set_description(
markata: "Markata",
article: "Post",
cache: "FanoutCache",
config: Dict,
max_description: int = 500,
plugin_text: None = "",
) -> None:
"""
For a given `article`, find the description, put it in the cache, and set
the configured descriptions for the article.
"""
key = markata.make_hash(
article.content,
plugin_text,
config,
)
description_from_cache = cache.get(key)
if description_from_cache is None:
description = get_description(article)[:max_description]
markata.cache.add(key, description, expire=config["cache_expire"])
else:
description = description_from_cache
for description_key in config:
if description_key not in ["cache_expire", "config_key"]:
# overwrites missing (None) and empty ('')
if not article.metadata.get(description_key):
article.metadata[description_key] = description[
: config[description_key]["len"]
]
pre_render function
The Markata hook that will set descriptions for all posts in the pre-render phase.
pre_render source
def pre_render(markata: "Markata") -> None:
"""
The Markata hook that will set descriptions for all posts in the pre-render phase.
"""
config = markata.get_plugin_config(__file__)
if "description" not in config.keys():
config["description"] = {}
config["description"]["len"] = 160
if "long_description" not in config.keys():
config["long_description"] = {}
config["long_description"]["len"] = 250
def try_config_get(key: str) -> Any:
try:
return config.get(key).get("len") or None
except AttributeError:
return None
max_description = max(
[
value
for description_key in config
if (value := try_config_get(description_key))
]
)
with markata.cache as cache:
for article in markata.iter_articles("setting auto description"):
set_description(
markata=markata,
article=article,
cache=cache,
config=config,
max_description=max_description,
plugin_text=Path(__file__).read_text(),
)
try_config_get function
None
try_config_get source
def try_config_get(key: str) -> Any:
try:
return config.get(key).get("len") or None
except AttributeError:
return None