learnxinyminutes-docs/lint/frontmatter.py

121 lines
4.1 KiB
Python
Raw Normal View History

2024-12-18 16:38:58 +00:00
#!/usr/bin/env python3
import re
from pathlib import Path
import yaml
import yamllint.config
import yamllint.linter
def extract_yaml_frontmatter(file_path):
"""Extracts YAML front matter from a Markdown file."""
with open(file_path, "r", encoding="utf-8") as file:
content = file.read()
matches = re.match(r"^(---\s*\n.*?\n)---\n", content, re.DOTALL)
if matches:
return matches.group(1)
return None
yaml_config = yamllint.config.YamlLintConfig(
"""{
extends: default,
2024-12-18 16:38:58 +00:00
rules: {
commas: disable,
trailing-spaces: disable,
indentation: disable,
line-length: disable,
empty-lines: disable
}
2024-12-18 16:38:58 +00:00
}"""
)
def lint_yaml(yaml_content):
"""Lints YAML content using yamllint by sending it to stdin."""
problems = []
for p in yamllint.linter.run(yaml_content, yaml_config):
problems.append(f"{p.line}:{p.column} {p.desc} ({p.rule})")
return "\n".join(problems)
def validate_yaml_keys(yaml_content, allowed_keys):
"""Validates that the YAML content contains only the specified keys."""
try:
data = yaml.safe_load(yaml_content)
if not data:
return "Empty YAML front matter."
extra_keys = set(data.keys()) - set(allowed_keys)
if extra_keys:
return f"Invalid keys found: {', '.join(extra_keys)}"
for key, value_type in allowed_keys.items():
if key in data:
if not isinstance(data[key], value_type):
return f"Invalid type for key '{key}': expected {value_type.__name__}, got {type(data[key]).__name__}"
if isinstance(data[key], list):
for item in data[key]:
if not isinstance(item, list):
return f"Invalid type for item in key '{key}': expected list, got {type(item).__name__}"
elif not item:
return f"Invalid item in key '{key}': found empty list"
elif not isinstance(item[0], str):
return f"Invalid type for item[0] in key '{key}': expected str, got {type(item[0]).__name__}"
elif len(item) == 2 and not isinstance(item[1], str):
return f"Invalid type for item[1] in key '{key}': expected str, got {type(item[1]).__name__}"
elif len(item) > 2:
return f"Invalid length for item in key '{key}': expected 1 or 2, got {len(item)}"
except yaml.YAMLError as e:
return f"Error parsing YAML: {e}"
return ""
def process_files(path):
"""Processes either a single file or all Markdown files in a directory."""
if path.is_dir():
pathlist = path.rglob("*.md")
else:
pathlist = [path]
has_error = False
allowed_keys = {
"name": str,
"where_x_eq_name": str,
"category": str,
"filename": str,
"contributors": list,
"translators": list,
}
for path in pathlist:
yaml_content = extract_yaml_frontmatter(path)
if yaml_content:
lint_result = lint_yaml(yaml_content)
key_validation = validate_yaml_keys(yaml_content, allowed_keys)
if lint_result or key_validation:
if has_error: # don't prepend newline to first error
print()
print(path)
if lint_result:
print(lint_result)
if key_validation:
print(key_validation)
has_error = True
return has_error
def main(path_input):
"""Determines if the input is a directory or a file and processes accordingly."""
path = Path(path_input)
if not path.exists():
print(f"Error: {path_input} does not exist.")
return 1
return process_files(path)
if __name__ == "__main__":
import sys
path_input = sys.argv[1] if len(sys.argv) > 1 else "."
has_error = main(path_input)
sys.exit(1 if has_error else 0)