Use charlock_holmes to do encoding detection. In my tests it has properly identified incorrect encodings that used to be present on older commits. This will help ensure this won't happen again, giving people instant feedback and allowing all pull requests to be checked

This commit is contained in:
Samantha McVey 2016-11-09 20:57:13 -08:00
parent 58ffb4057f
commit 70d6977ccc
No known key found for this signature in database
GPG Key ID: A68DF012C3881D62
3 changed files with 16 additions and 7 deletions

View File

@ -1,3 +1,4 @@
group :test do group :test do
gem 'rake' gem 'rake'
gem 'charlock_holmes'
end end

View File

@ -2,6 +2,10 @@ task default: %w[test]
task :test do task :test do
Dir["./tests/*.rb"].each do |test_file| Dir["./tests/*.rb"].each do |test_file|
ruby test_file begin
ruby test_file
rescue
puts "FAILED #{test_file}!"
end
end end
end end

View File

@ -1,14 +1,18 @@
#!/usr/bin/env ruby #!/usr/bin/env ruby
require 'charlock_holmes'
$file_count = 0; $file_count = 0;
markdown_files = Dir["./**/*.html.markdown"] markdown_files = Dir["./**/*.html.markdown"]
markdown_files.each do |file| markdown_files.each do |file|
begin begin
file_bin = File.open(file, "rb") contents = File.read(file)
contents = file_bin.read detection = CharlockHolmes::EncodingDetector.detect(contents)
if ! contents.valid_encoding? case detection[:encoding]
puts "#{file} has an invalid encoding! Please save the file in UTF-8!" when 'UTF-8'
else
$file_count = $file_count + 1 $file_count = $file_count + 1
when 'ISO-8859-1'
$file_count = $file_count + 1
else
puts "#{file} was detected as #{detection[:encoding]} encoding! Please save the file in UTF-8!"
end end
rescue Exception => msg rescue Exception => msg
puts msg puts msg
@ -20,6 +24,6 @@ if files_failed != 0
puts "Please resave the file as UTF-8." puts "Please resave the file as UTF-8."
exit 1 exit 1
else else
puts "Success. All #{$file_count} files passed UTF-8 validity checks" puts "Success. All #{$file_count} files Ruby's UTF-8 validity checks. This won't catch most problems."
exit 0 exit 0
end end