@@ -1360,10 +1360,34 @@ def load_html_string(self, html_string, new_page=True):
1360
1360
If new_page==True, the page will switch to: "data:text/html,"
1361
1361
If new_page==False, will load HTML into the current page. """
1362
1362
1363
+ soup = self .get_beautiful_soup (html_string )
1364
+ found_base = False
1365
+ links = soup .findAll ("link" )
1366
+ href = None
1367
+
1368
+ for link in links :
1369
+ if link .get ("rel" ) == ["canonical" ] and link .get ("href" ):
1370
+ found_base = True
1371
+ href = link .get ("href" )
1372
+ href = self .get_domain_url (href )
1373
+ if found_base and html_string .count ("<head>" ) == 1 and (
1374
+ html_string .count ("<base" ) == 0 ):
1375
+ html_string = html_string .replace (
1376
+ "<head>" , '<head><base href="%s">' % href )
1377
+ elif not found_base :
1378
+ bases = soup .findAll ("base" )
1379
+ for base in bases :
1380
+ if base .get ("href" ):
1381
+ href = base .get ("href" )
1382
+ if href :
1383
+ html_string = html_string .replace (
1384
+ 'base: "."' , 'base: "%s"' % href )
1385
+
1363
1386
soup = self .get_beautiful_soup (html_string )
1364
1387
scripts = soup .findAll ("script" )
1365
1388
for script in scripts :
1366
- html_string = html_string .replace (str (script ), "" )
1389
+ if script .get ("type" ) != "application/json" :
1390
+ html_string = html_string .replace (str (script ), "" )
1367
1391
soup = self .get_beautiful_soup (html_string )
1368
1392
1369
1393
found_head = False
@@ -1413,19 +1437,28 @@ def load_html_string(self, html_string, new_page=True):
1413
1437
1414
1438
for script in scripts :
1415
1439
js_code = script .string
1416
- js_code_lines = js_code .split ('\n ' )
1417
- new_lines = []
1418
- for line in js_code_lines :
1419
- line = line .strip ()
1420
- new_lines .append (line )
1421
- js_code = '\n ' .join (new_lines )
1422
- js_utils .add_js_code (self .driver , js_code )
1440
+ js_src = script .get ("src" )
1441
+ if js_code and script .get ("type" ) != "application/json" :
1442
+ js_code_lines = js_code .split ('\n ' )
1443
+ new_lines = []
1444
+ for line in js_code_lines :
1445
+ line = line .strip ()
1446
+ new_lines .append (line )
1447
+ js_code = '\n ' .join (new_lines )
1448
+ js_utils .add_js_code (self .driver , js_code )
1449
+ elif js_src :
1450
+ js_utils .add_js_link (self .driver , js_src )
1451
+ else :
1452
+ pass
1423
1453
1424
1454
def load_html_file (self , html_file , new_page = True ):
1425
1455
""" Loads a local html file into the browser from a relative file path.
1426
1456
If new_page==True, the page will switch to: "data:text/html,"
1427
1457
If new_page==False, will load HTML into the current page.
1428
1458
Local images and other local src content WILL BE IGNORED. """
1459
+ if self .__looks_like_a_page_url (html_file ):
1460
+ self .open (html_file )
1461
+ return
1429
1462
if len (html_file ) < 6 or not html_file .endswith (".html" ):
1430
1463
raise Exception ('Expecting a ".html" file!' )
1431
1464
abs_path = os .path .abspath ('.' )
0 commit comments