@@ -6,7 +6,7 @@ import scrape from 'website-scraper';
66const testDirname = './test/functional/encoding/.tmp' ;
77const mockDirname = './test/functional/encoding/mocks' ;
88
9- describe ( 'Functional: UTF8 characters are properly encoded/decoded ' , ( ) => {
9+ describe ( 'Functional: encoding ' , ( ) => {
1010 const options = {
1111 urls : [
1212 'http://example.com/' ,
@@ -26,17 +26,29 @@ describe('Functional: UTF8 characters are properly encoded/decoded', () => {
2626 await fs . rm ( testDirname , { recursive : true , force : true } ) ;
2727 } ) ;
2828
29- beforeEach ( ( ) => {
30- nock ( 'http://example.com/' ) . get ( '/' ) . replyWithFile ( 200 , mockDirname + '/index.html' , { 'content-type' : 'text/html; charset=utf-8' } ) ;
29+ it ( 'should save the page with enconding from http response headers' , async ( ) => {
30+ nock ( 'http://example.com/' ) . get ( '/' ) . replyWithFile ( 200 , mockDirname + '/without-charset.html' , { 'content-type' : 'text/html; charset=utf-8' } ) ;
31+
32+ await scrape ( options ) ;
33+
34+ const scrapedIndex = await fs . readFile ( testDirname + '/index.html' , { encoding : 'utf8' } ) ;
35+ scrapedIndex . should . be . containEql ( '<div id="special-characters-korean">저는 7년 동안 한국에서 살았어요.</div>' ) ;
36+ scrapedIndex . should . be . containEql ( '<div id="special-characters-ukrainian">Слава Україні!</div>' ) ;
37+ scrapedIndex . should . be . containEql ( '<div id="special-characters-chinese">加入网站</div>' ) ;
38+ scrapedIndex . should . be . containEql ( '<div id="special-characters-ukrainian">Обладнання та ПЗ</div>' ) ;
39+ scrapedIndex . should . be . containEql ( '<div id="special-characters-french">PAR PASSION DU VÉLO</div>' ) ;
3140 } ) ;
3241
33- it ( 'should save the page in the same data as it was originally' , async ( ) => {
42+ it ( 'should save the page with enconding from html meta tag' , async ( ) => {
43+ nock ( 'http://example.com/' ) . get ( '/' ) . replyWithFile ( 200 , mockDirname + '/with-charset.html' , { 'content-type' : 'text/html' } ) ;
44+
3445 await scrape ( options ) ;
3546
3647 const scrapedIndex = await fs . readFile ( testDirname + '/index.html' , { encoding : 'utf8' } ) ;
3748 scrapedIndex . should . be . containEql ( '<div id="special-characters-korean">저는 7년 동안 한국에서 살았어요.</div>' ) ;
3849 scrapedIndex . should . be . containEql ( '<div id="special-characters-ukrainian">Слава Україні!</div>' ) ;
3950 scrapedIndex . should . be . containEql ( '<div id="special-characters-chinese">加入网站</div>' ) ;
4051 scrapedIndex . should . be . containEql ( '<div id="special-characters-ukrainian">Обладнання та ПЗ</div>' ) ;
52+ scrapedIndex . should . be . containEql ( '<div id="special-characters-french">PAR PASSION DU VÉLO</div>' ) ;
4153 } ) ;
4254} ) ;
0 commit comments