Lint frontmatter

This commit is contained in:
Boris Verkhovskiy 2024-12-18 09:04:26 -07:00
parent 4d7ecfbbf7
commit 3d8a8213c3
6 changed files with 163 additions and 54 deletions

View File

@ -11,8 +11,18 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.13'
- run: pip install -r lint/requirements.txt
- uses: ruby/setup-ruby@v1
with:
ruby-version: '3.2'
- run: gem install mdl
- run: mdl . --ignore-front-matter -r MD003,MD011,MD023,MD027,MD028,MD035,MD037,MD038,MD039,MD047
- name: Files are UTF-8
run: ./lint/encoding.sh .
- name: Lint Markdown
run: mdl . --ignore-front-matter -r MD003,MD011,MD023,MD027,MD028,MD035,MD037,MD038,MD039,MD047
- name: Lint frontmatter
run: ./lint/frontmatter.py .

30
lint/encoding.sh Executable file
View File

@ -0,0 +1,30 @@
#!/bin/bash
check_encoding() {
file="$1"
encoding=$(file -b --mime-encoding "$file")
# Check if the encoding is neither UTF-8 nor US-ASCII
if [[ "$encoding" != "utf-8" && "$encoding" != "us-ascii" ]]; then
# Print the file path and encoding
echo "Error: $file has encoding $encoding, which is not utf-8 or us-ascii"
return 1
fi
# Check for UTF-8 BOM
if [[ "$encoding" == "utf-8" ]]; then
if head -c 3 "$file" | cmp -s <(echo -ne '\xEF\xBB\xBF'); then
echo "Error: $file contains a UTF-8 BOM"
return 1
fi
fi
return 0
}
export -f check_encoding
# Default to current directory if no argument is given
directory="${1:-.}"
find "$directory" -type f -name "*.md" -print0 | xargs -0 -P 8 -I {} bash -c 'check_encoding "$@"' _ {}

120
lint/frontmatter.py Executable file
View File

@ -0,0 +1,120 @@
#!/usr/bin/env python3
import re
from pathlib import Path
import yaml
import yamllint.config
import yamllint.linter
def extract_yaml_frontmatter(file_path):
"""Extracts YAML front matter from a Markdown file."""
with open(file_path, "r", encoding="utf-8") as file:
content = file.read()
matches = re.match(r"^(---\s*\n.*?\n)---\n", content, re.DOTALL)
if matches:
return matches.group(1)
return None
yaml_config = yamllint.config.YamlLintConfig(
"""{
extends: relaxed,
rules: {
commas: disable,
trailing-spaces: disable,
indentation: disable,
line-length: disable,
empty-lines: disable
}
}"""
)
def lint_yaml(yaml_content):
"""Lints YAML content using yamllint by sending it to stdin."""
problems = []
for p in yamllint.linter.run(yaml_content, yaml_config):
problems.append(f"{p.line}:{p.column} {p.desc} ({p.rule})")
return "\n".join(problems)
def validate_yaml_keys(yaml_content, allowed_keys):
"""Validates that the YAML content contains only the specified keys."""
try:
data = yaml.safe_load(yaml_content)
if not data:
return "Empty YAML front matter."
extra_keys = set(data.keys()) - set(allowed_keys)
if extra_keys:
return f"Invalid keys found: {', '.join(extra_keys)}"
for key, value_type in allowed_keys.items():
if key in data:
if not isinstance(data[key], value_type):
return f"Invalid type for key '{key}': expected {value_type.__name__}, got {type(data[key]).__name__}"
if isinstance(data[key], list):
for item in data[key]:
if not isinstance(item, list):
return f"Invalid type for item in key '{key}': expected list, got {type(item).__name__}"
elif not item:
return f"Invalid item in key '{key}': found empty list"
elif not isinstance(item[0], str):
return f"Invalid type for item[0] in key '{key}': expected str, got {type(item[0]).__name__}"
elif len(item) == 2 and not isinstance(item[1], str):
return f"Invalid type for item[1] in key '{key}': expected str, got {type(item[1]).__name__}"
elif len(item) > 2:
return f"Invalid length for item in key '{key}': expected 1 or 2, got {len(item)}"
except yaml.YAMLError as e:
return f"Error parsing YAML: {e}"
return ""
def process_files(path):
"""Processes either a single file or all Markdown files in a directory."""
if path.is_dir():
pathlist = path.rglob("*.md")
else:
pathlist = [path]
has_error = False
allowed_keys = {
"name": str,
"where_x_eq_name": str,
"category": str,
"filename": str,
"contributors": list,
"translators": list,
}
for path in pathlist:
yaml_content = extract_yaml_frontmatter(path)
if yaml_content:
lint_result = lint_yaml(yaml_content)
key_validation = validate_yaml_keys(yaml_content, allowed_keys)
if lint_result or key_validation:
if has_error: # don't prepend newline to first error
print()
print(path)
if lint_result:
print(lint_result)
if key_validation:
print(key_validation)
has_error = True
return has_error
def main(path_input):
"""Determines if the input is a directory or a file and processes accordingly."""
path = Path(path_input)
if not path.exists():
print(f"Error: {path_input} does not exist.")
return 1
return process_files(path)
if __name__ == "__main__":
import sys
path_input = sys.argv[1] if len(sys.argv) > 1 else "."
has_error = main(path_input)
sys.exit(1 if has_error else 0)

2
lint/requirements.txt Normal file
View File

@ -0,0 +1,2 @@
yamllint
pyyaml

View File

@ -1,32 +0,0 @@
#!/usr/bin/env ruby
require 'charlock_holmes'
$file_count = 0;
markdown_files = Dir["./**/*.html.markdown"]
markdown_files.each do |file|
begin
contents = File.read(file)
detection = CharlockHolmes::EncodingDetector.detect(contents)
case detection[:encoding]
when 'UTF-8'
$file_count = $file_count + 1
when 'ISO-8859-1'
$file_count = $file_count + 1
when /ISO-8859/
puts "Notice: #{file} was detected as #{detection[:encoding]} encoding. Everything is probably fine."
$file_count = $file_count + 1
else
puts "WARNING #{file} was detected as #{detection[:encoding]} encoding. Please save the file in UTF-8!"
end
rescue Exception => msg
puts msg
end
end
files_failed = markdown_files.length - $file_count
if files_failed != 0
puts "FAILURE!!! #{files_failed} files were unable to be validated as UTF-8!"
puts "Please resave the file as UTF-8."
exit 1
else
puts "Success. All #{$file_count} files passed UTF-8 validity checks."
exit 0
end

View File

@ -1,21 +0,0 @@
#!/usr/bin/env ruby
require 'yaml';
$file_count = 0;
markdown_files = Dir["./**/*.html.markdown"]
markdown_files.each do |file|
begin
YAML.load_file(file)
$file_count = $file_count + 1
rescue Exception => msg
puts msg
end
end
files_failed = markdown_files.length - $file_count
if files_failed != 0
puts "FAILURE!!! #{files_failed} files were unable to be parsed!"
puts "Please check the YAML headers for the documents that failed!"
exit 1
else
puts "All #{$file_count} files were verified valid YAML"
exit 0
end