From 3d8a8213c38f6885d92d512bd3f9e2eed6ec6808 Mon Sep 17 00:00:00 2001 From: Boris Verkhovskiy Date: Wed, 18 Dec 2024 09:04:26 -0700 Subject: [PATCH] Lint frontmatter --- .github/workflows/lint.yml | 12 +++- lint/encoding.sh | 30 ++++++++++ lint/frontmatter.py | 120 +++++++++++++++++++++++++++++++++++++ lint/requirements.txt | 2 + tests/encoding.rb | 32 ---------- tests/yaml.rb | 21 ------- 6 files changed, 163 insertions(+), 54 deletions(-) create mode 100755 lint/encoding.sh create mode 100755 lint/frontmatter.py create mode 100644 lint/requirements.txt delete mode 100644 tests/encoding.rb delete mode 100644 tests/yaml.rb diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 4db0c300..5ab9bbdc 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -11,8 +11,18 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.13' + - run: pip install -r lint/requirements.txt - uses: ruby/setup-ruby@v1 with: ruby-version: '3.2' - run: gem install mdl - - run: mdl . --ignore-front-matter -r MD003,MD011,MD023,MD027,MD028,MD035,MD037,MD038,MD039,MD047 + + - name: Files are UTF-8 + run: ./lint/encoding.sh . + - name: Lint Markdown + run: mdl . --ignore-front-matter -r MD003,MD011,MD023,MD027,MD028,MD035,MD037,MD038,MD039,MD047 + - name: Lint frontmatter + run: ./lint/frontmatter.py . diff --git a/lint/encoding.sh b/lint/encoding.sh new file mode 100755 index 00000000..5fe3e1a7 --- /dev/null +++ b/lint/encoding.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +check_encoding() { + file="$1" + encoding=$(file -b --mime-encoding "$file") + + # Check if the encoding is neither UTF-8 nor US-ASCII + if [[ "$encoding" != "utf-8" && "$encoding" != "us-ascii" ]]; then + # Print the file path and encoding + echo "Error: $file has encoding $encoding, which is not utf-8 or us-ascii" + return 1 + fi + + # Check for UTF-8 BOM + if [[ "$encoding" == "utf-8" ]]; then + if head -c 3 "$file" | cmp -s <(echo -ne '\xEF\xBB\xBF'); then + echo "Error: $file contains a UTF-8 BOM" + return 1 + fi + fi + + return 0 +} + +export -f check_encoding + +# Default to current directory if no argument is given +directory="${1:-.}" + +find "$directory" -type f -name "*.md" -print0 | xargs -0 -P 8 -I {} bash -c 'check_encoding "$@"' _ {} diff --git a/lint/frontmatter.py b/lint/frontmatter.py new file mode 100755 index 00000000..972227ea --- /dev/null +++ b/lint/frontmatter.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 + +import re +from pathlib import Path +import yaml +import yamllint.config +import yamllint.linter + + +def extract_yaml_frontmatter(file_path): + """Extracts YAML front matter from a Markdown file.""" + with open(file_path, "r", encoding="utf-8") as file: + content = file.read() + matches = re.match(r"^(---\s*\n.*?\n)---\n", content, re.DOTALL) + if matches: + return matches.group(1) + return None + + +yaml_config = yamllint.config.YamlLintConfig( + """{ + extends: relaxed, + rules: { + commas: disable, + trailing-spaces: disable, + indentation: disable, + line-length: disable, + empty-lines: disable + } + }""" +) + + +def lint_yaml(yaml_content): + """Lints YAML content using yamllint by sending it to stdin.""" + problems = [] + for p in yamllint.linter.run(yaml_content, yaml_config): + problems.append(f"{p.line}:{p.column} {p.desc} ({p.rule})") + return "\n".join(problems) + + +def validate_yaml_keys(yaml_content, allowed_keys): + """Validates that the YAML content contains only the specified keys.""" + try: + data = yaml.safe_load(yaml_content) + if not data: + return "Empty YAML front matter." + extra_keys = set(data.keys()) - set(allowed_keys) + if extra_keys: + return f"Invalid keys found: {', '.join(extra_keys)}" + for key, value_type in allowed_keys.items(): + if key in data: + if not isinstance(data[key], value_type): + return f"Invalid type for key '{key}': expected {value_type.__name__}, got {type(data[key]).__name__}" + if isinstance(data[key], list): + for item in data[key]: + if not isinstance(item, list): + return f"Invalid type for item in key '{key}': expected list, got {type(item).__name__}" + elif not item: + return f"Invalid item in key '{key}': found empty list" + elif not isinstance(item[0], str): + return f"Invalid type for item[0] in key '{key}': expected str, got {type(item[0]).__name__}" + elif len(item) == 2 and not isinstance(item[1], str): + return f"Invalid type for item[1] in key '{key}': expected str, got {type(item[1]).__name__}" + elif len(item) > 2: + return f"Invalid length for item in key '{key}': expected 1 or 2, got {len(item)}" + except yaml.YAMLError as e: + return f"Error parsing YAML: {e}" + return "" + + +def process_files(path): + """Processes either a single file or all Markdown files in a directory.""" + if path.is_dir(): + pathlist = path.rglob("*.md") + else: + pathlist = [path] + + has_error = False + allowed_keys = { + "name": str, + "where_x_eq_name": str, + "category": str, + "filename": str, + "contributors": list, + "translators": list, + } + for path in pathlist: + yaml_content = extract_yaml_frontmatter(path) + if yaml_content: + lint_result = lint_yaml(yaml_content) + key_validation = validate_yaml_keys(yaml_content, allowed_keys) + if lint_result or key_validation: + if has_error: # don't prepend newline to first error + print() + print(path) + if lint_result: + print(lint_result) + if key_validation: + print(key_validation) + has_error = True + return has_error + + +def main(path_input): + """Determines if the input is a directory or a file and processes accordingly.""" + path = Path(path_input) + if not path.exists(): + print(f"Error: {path_input} does not exist.") + return 1 + + return process_files(path) + + +if __name__ == "__main__": + import sys + + path_input = sys.argv[1] if len(sys.argv) > 1 else "." + has_error = main(path_input) + sys.exit(1 if has_error else 0) diff --git a/lint/requirements.txt b/lint/requirements.txt new file mode 100644 index 00000000..29c0e827 --- /dev/null +++ b/lint/requirements.txt @@ -0,0 +1,2 @@ +yamllint +pyyaml diff --git a/tests/encoding.rb b/tests/encoding.rb deleted file mode 100644 index a0b3b184..00000000 --- a/tests/encoding.rb +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env ruby -require 'charlock_holmes' -$file_count = 0; -markdown_files = Dir["./**/*.html.markdown"] -markdown_files.each do |file| - begin - contents = File.read(file) - detection = CharlockHolmes::EncodingDetector.detect(contents) - case detection[:encoding] - when 'UTF-8' - $file_count = $file_count + 1 - when 'ISO-8859-1' - $file_count = $file_count + 1 - when /ISO-8859/ - puts "Notice: #{file} was detected as #{detection[:encoding]} encoding. Everything is probably fine." - $file_count = $file_count + 1 - else - puts "WARNING #{file} was detected as #{detection[:encoding]} encoding. Please save the file in UTF-8!" - end - rescue Exception => msg - puts msg - end -end -files_failed = markdown_files.length - $file_count -if files_failed != 0 - puts "FAILURE!!! #{files_failed} files were unable to be validated as UTF-8!" - puts "Please resave the file as UTF-8." - exit 1 -else - puts "Success. All #{$file_count} files passed UTF-8 validity checks." - exit 0 -end diff --git a/tests/yaml.rb b/tests/yaml.rb deleted file mode 100644 index 0ed918e0..00000000 --- a/tests/yaml.rb +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env ruby -require 'yaml'; -$file_count = 0; -markdown_files = Dir["./**/*.html.markdown"] -markdown_files.each do |file| - begin - YAML.load_file(file) - $file_count = $file_count + 1 - rescue Exception => msg - puts msg - end -end -files_failed = markdown_files.length - $file_count -if files_failed != 0 - puts "FAILURE!!! #{files_failed} files were unable to be parsed!" - puts "Please check the YAML headers for the documents that failed!" - exit 1 -else - puts "All #{$file_count} files were verified valid YAML" - exit 0 -end