diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..df3eb51
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+addopts = --doctest-modules
diff --git a/tests/test_french_and_related.py b/tests/test_french_and_related.py
index c347213..58b2d88 100644
--- a/tests/test_french_and_related.py
+++ b/tests/test_french_and_related.py
@@ -1,40 +1,32 @@
-from nose.tools import eq_, assert_almost_equal
 from wordfreq import tokenize, word_frequency
 
 
 def test_apostrophes():
     # Test that we handle apostrophes in French reasonably.
-    eq_(tokenize("qu'un", 'fr'), ['qu', 'un'])
-    eq_(tokenize("qu'un", 'fr', include_punctuation=True),
-        ["qu'", "un"])
-    eq_(tokenize("langues d'oïl", 'fr'),
-        ['langues', "d", 'oïl'])
-    eq_(tokenize("langues d'oïl", 'fr', include_punctuation=True),
-        ['langues', "d'", 'oïl'])
-    eq_(tokenize("l'heure", 'fr'),
-        ['l', 'heure'])
-    eq_(tokenize("l'heure", 'fr', include_punctuation=True),
-        ["l'", 'heure'])
-    eq_(tokenize("L'Hôpital", 'fr', include_punctuation=True),
-        ["l'", 'hôpital'])
-    eq_(tokenize("aujourd'hui", 'fr'), ["aujourd'hui"])
-    eq_(tokenize("This isn't French", 'en'),
-        ['this', "isn't", 'french'])
+    assert tokenize("qu'un", 'fr') == ['qu', 'un']
+    assert tokenize("qu'un", 'fr', include_punctuation=True) == ["qu'", "un"]
+    assert tokenize("langues d'oïl", 'fr') == ['langues', "d", 'oïl']
+    assert tokenize("langues d'oïl", 'fr', include_punctuation=True) == ['langues', "d'", 'oïl']
+    assert tokenize("l'heure", 'fr') == ['l', 'heure']
+    assert tokenize("l'heure", 'fr', include_punctuation=True) == ["l'", 'heure']
+    assert tokenize("L'Hôpital", 'fr', include_punctuation=True) == ["l'", 'hôpital']
+    assert tokenize("aujourd'hui", 'fr') == ["aujourd'hui"]
+    assert tokenize("This isn't French", 'en') == ['this', "isn't", 'french']
 
 
 def test_catastrophes():
     # More apostrophes, but this time they're in Catalan, and there's other
     # mid-word punctuation going on too.
-    eq_(tokenize("M'acabo d'instal·lar.", 'ca'),
-        ['m', 'acabo', 'd', 'instal·lar'])
-    eq_(tokenize("M'acabo d'instal·lar.", 'ca', include_punctuation=True),
-        ["m'", 'acabo', "d'", 'instal·lar', '.'])
+    assert tokenize("M'acabo d'instal·lar.", 'ca') == ['m', 'acabo', 'd', 'instal·lar']
+    assert (
+        tokenize("M'acabo d'instal·lar.", 'ca', include_punctuation=True) ==
+        ["m'", 'acabo', "d'", 'instal·lar', '.']
+    )
 
 
 def test_alternate_codes():
     # Try over-long language codes for French and Catalan
-    eq_(tokenize("qu'un", 'fra'), ['qu', 'un'])
-    eq_(tokenize("qu'un", 'fre'), ['qu', 'un'])
-    eq_(tokenize("M'acabo d'instal·lar.", 'cat'),
-        ['m', 'acabo', 'd', 'instal·lar'])
+    assert tokenize("qu'un", 'fra') == ['qu', 'un']
+    assert tokenize("qu'un", 'fre') == ['qu', 'un']
+    assert tokenize("M'acabo d'instal·lar.", 'cat') == ['m', 'acabo', 'd', 'instal·lar']
 
diff --git a/tests/test_japanese.py b/tests/test_japanese.py
index 1cd1efa..5e977cf 100644
--- a/tests/test_japanese.py
+++ b/tests/test_japanese.py
@@ -1,10 +1,9 @@
-from nose.tools import eq_, assert_almost_equal
 from wordfreq import tokenize, simple_tokenize, word_frequency
+import pytest
 
 
 def test_tokens():
-    eq_(tokenize('おはようございます', 'ja'),
-        ['おはよう', 'ござい', 'ます'])
+    assert tokenize('おはようございます', 'ja') == ['おはよう', 'ござい', 'ます']
 
 
 def test_simple_tokenize():
@@ -19,31 +18,29 @@ def test_simple_tokenize():
     # We used to try to infer word boundaries between hiragana and katakana,
     # but this leads to edge cases that are unsolvable without a dictionary.
     ja_text = 'ひらがなカタカナromaji'
-    eq_(
-        simple_tokenize(ja_text),
-        ['ひらがなカタカナ', 'romaji']
-    )
+    assert simple_tokenize(ja_text) == ['ひらがなカタカナ', 'romaji']
+    
 
     # An example that would be multiple tokens if tokenized as 'ja' via MeCab,
     # but sticks together in simple_tokenize
-    eq_(simple_tokenize('おはようございます'), ['おはようございます'])
+    assert simple_tokenize('おはようございます') == ['おはようございます']
 
     # Names that use the weird possessive marker ヶ, which is technically a
     # katakana even though it's being used like a kanji, stay together as one
     # token
-    eq_(simple_tokenize("犬ヶ島"), ["犬ヶ島"])
+    assert simple_tokenize("犬ヶ島") == ["犬ヶ島"]
 
     # The word in ConceptNet that made me notice that simple_tokenize used
     # to have a problem with the character 々
-    eq_(simple_tokenize("晴々しい"), ["晴々しい"])
+    assert simple_tokenize("晴々しい") == ["晴々しい"]
 
     # Explicit word separators are still token boundaries, such as the dot
     # between "toner" and "cartridge" in "toner cartridge"
-    eq_(simple_tokenize("トナー・カートリッジ"), ["トナー", "カートリッジ"])
+    assert simple_tokenize("トナー・カートリッジ") == ["トナー", "カートリッジ"]
 
     # This word has multiple weird characters that aren't quite kanji in it,
     # and is in the dictionary
-    eq_(simple_tokenize("見ヶ〆料"), ["見ヶ〆料"])
+    assert simple_tokenize("見ヶ〆料") == ["見ヶ〆料"]
 
 
 
@@ -52,12 +49,11 @@ def test_combination():
     gozai_freq = word_frequency('ござい', 'ja')
     masu_freq = word_frequency('ます', 'ja')
 
-    assert_almost_equal(
-        word_frequency('おはようおはよう', 'ja'),
-        ohayou_freq / 2
-    )
-    assert_almost_equal(
-        1.0 / word_frequency('おはようございます', 'ja'),
-        1.0 / ohayou_freq + 1.0 / gozai_freq + 1.0 / masu_freq
+    assert word_frequency('おはようおはよう', 'ja') == pytest.approx(ohayou_freq / 2)
+    
+    assert (
+        1.0 / word_frequency('おはようございます', 'ja') ==
+        pytest.approx(1.0 / ohayou_freq + 1.0 / gozai_freq + 1.0 / masu_freq)
     )
+    
 
diff --git a/tests/test_korean.py b/tests/test_korean.py
index bcbf29c..96d599a 100644
--- a/tests/test_korean.py
+++ b/tests/test_korean.py
@@ -1,22 +1,18 @@
-from nose.tools import eq_, assert_almost_equal
 from wordfreq import tokenize, word_frequency
+import pytest
 
 
 def test_tokens():
-    eq_(tokenize('감사합니다', 'ko'),
-        ['감사', '합니다'])
+    assert tokenize('감사합니다', 'ko') == ['감사', '합니다']
 
 
 def test_combination():
     gamsa_freq = word_frequency('감사', 'ko')
     habnida_freq = word_frequency('합니다', 'ko')
 
-    assert_almost_equal(
-        word_frequency('감사감사', 'ko'),
-        gamsa_freq / 2
-    )
-    assert_almost_equal(
-        1.0 / word_frequency('감사합니다', 'ko'),
-        1.0 / gamsa_freq + 1.0 / habnida_freq
+    assert word_frequency('감사감사', 'ko') == pytest.approx(gamsa_freq / 2)
+    assert (
+        1.0 / word_frequency('감사합니다', 'ko') ==
+        pytest.approx(1.0 / gamsa_freq + 1.0 / habnida_freq)
     )
 
diff --git a/tests/test_transliteration.py b/tests/test_transliteration.py
index d7e4455..08486b0 100644
--- a/tests/test_transliteration.py
+++ b/tests/test_transliteration.py
@@ -1,15 +1,18 @@
-from nose.tools import eq_
 from wordfreq import tokenize
 from wordfreq.preprocess import preprocess_text
 
 
 def test_transliteration():
     # "Well, there's a lot of things you do not understand."
-    # (from somewhere in OpenSubtitles)
-    eq_(tokenize("Па, има ту много ствари које не схваташ.", 'sr'),
-        ['pa', 'ima', 'tu', 'mnogo', 'stvari', 'koje', 'ne', 'shvataš'])
-    eq_(tokenize("Pa, ima tu mnogo stvari koje ne shvataš.", 'sr'),
-        ['pa', 'ima', 'tu', 'mnogo', 'stvari', 'koje', 'ne', 'shvataš'])
+    # (from somewhere in OpenSubtitles
+    assert (
+        tokenize("Па, има ту много ствари које не схваташ.", 'sr') ==
+        ['pa', 'ima', 'tu', 'mnogo', 'stvari', 'koje', 'ne', 'shvataš']
+    )
+    assert (
+        tokenize("Pa, ima tu mnogo stvari koje ne shvataš.", 'sr') ==
+        ['pa', 'ima', 'tu', 'mnogo', 'stvari', 'koje', 'ne', 'shvataš']
+    )
 
     # I don't have examples of complete sentences in Azerbaijani that are
     # naturally in Cyrillic, because it turns out everyone writes Azerbaijani
@@ -17,14 +20,14 @@ def test_transliteration():
     # So here are some individual words.
 
     # 'library' in Azerbaijani Cyrillic
-    eq_(preprocess_text('китабхана', 'az'), 'kitabxana')
-    eq_(preprocess_text('КИТАБХАНА', 'az'), 'kitabxana')
-    eq_(preprocess_text('KİTABXANA', 'az'), 'kitabxana')
+    assert preprocess_text('китабхана', 'az') == 'kitabxana'
+    assert preprocess_text('КИТАБХАНА', 'az') == 'kitabxana'
+    assert preprocess_text('KİTABXANA', 'az') == 'kitabxana'
 
     # 'scream' in Azerbaijani Cyrillic
-    eq_(preprocess_text('бағырты', 'az'), 'bağırtı')
-    eq_(preprocess_text('БАҒЫРТЫ', 'az'), 'bağırtı')
-    eq_(preprocess_text('BAĞIRTI', 'az'), 'bağırtı')
+    assert preprocess_text('бағырты', 'az') == 'bağırtı'
+    assert preprocess_text('БАҒЫРТЫ', 'az') == 'bağırtı'
+    assert preprocess_text('BAĞIRTI', 'az') == 'bağırtı'
 
 
 def test_actually_russian():
@@ -35,15 +38,13 @@ def test_actually_russian():
     # We make sure to handle this case so we don't end up with a mixed-script
     # word like "pacanы".
 
-    eq_(tokenize("сто из ста, пацаны!", 'sr'),
-        ['sto', 'iz', 'sta', 'pacany'])
-
-    eq_(tokenize("культуры", 'sr'), ["kul'tury"])
+    assert tokenize("сто из ста, пацаны!", 'sr') == ['sto', 'iz', 'sta', 'pacany']
+    assert tokenize("культуры", 'sr') == ["kul'tury"]
 
 
 def test_alternate_codes():
     # Try language codes for Serbo-Croatian that have been split, and now
     # are canonically mapped to Serbian
-    eq_(tokenize("культуры", 'sh'), ["kul'tury"])
-    eq_(tokenize("культуры", 'hbs'), ["kul'tury"])
+    assert tokenize("культуры", 'sh') == ["kul'tury"]
+    assert tokenize("культуры", 'hbs') == ["kul'tury"]