mirror of
https://github.com/rspeer/wordfreq.git
synced 2024-12-26 02:28:50 +00:00
21 lines
521 B
Python
21 lines
521 B
Python
|
from wordfreq_builder.word_counts import URL_RE
|
||
|
from nose.tools import eq_
|
||
|
|
||
|
|
||
|
def check_url(url):
|
||
|
match = URL_RE.match(url)
|
||
|
assert match
|
||
|
eq_(match.span(), (0, len(url)))
|
||
|
|
||
|
|
||
|
def test_url_re():
|
||
|
# URLs like this are all over the Arabic Wikipedia. Here's one with the
|
||
|
# student ID blanked out.
|
||
|
yield check_url, 'http://www.ju.edu.jo/alumnicard/0000000.aspx'
|
||
|
|
||
|
yield check_url, 'https://example.com/űnicode.html'
|
||
|
yield check_url, 'http://☃.net'
|
||
|
|
||
|
assert not URL_RE.match('ftp://127.0.0.1')
|
||
|
|