diff --git a/README.md b/README.md index 965b2a8..70ccc18 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,7 @@ tldjs.parse('https://spark-public.s3.amazonaws.com/dataanalysis/loansData.csv'); // tldExists: true, // publicSuffix: 's3.amazonaws.com', // domain: 'spark-public.s3.amazonaws.com', +// sld: 'spark-public', // subdomain: '' // } @@ -59,6 +60,7 @@ tldjs.parse('gopher://domain.unknown/'); // tldExists: false, // publicSuffix: 'unknown', // domain: 'domain.unknown', +// sld: 'domain', // subdomain: '' // } @@ -69,6 +71,7 @@ tldjs.parse('https://192.168.0.0') // tldExists: false, // publicSuffix: null, // domain: null, +// sld: null, // subdomain: null // } ``` @@ -80,6 +83,7 @@ tldjs.parse('https://192.168.0.0') | `tldExists` | `Boolean` | Is the TLD well-known or not? | | `publicSuffix`| `String` | | | `domain` | `String` | | +| `sld` | `String` | | | `subdomain` | `String` | | @@ -151,6 +155,23 @@ getPublicSuffix('s3.amazonaws.com'); // returns `s3.amazonaws.com` getPublicSuffix('tld.is.unknown'); // returns `unknown` ``` +### getSLD() + +Returns the Second Level Domain (SLD) for a given string — parseable with [`require('url').parse`][]. + +```javascript +const { getSLD } = tldjs; + +getSLD('google.com'); // returns `google` +getSLD('fr.google.com'); // returns `google` +getSLD('google.co.uk'); // returns `google` +getSLD('www.google.com'); // returns `google` +getSLD('mail.google.co.uk'); // returns `google` +getSLD('localhost'); // returns `null` +getSLD('com'); // returns `null` +getSLD('https://user:password@example.co.uk:8080/some/path?and&query#hash'); // returns `example` +``` + ### isValidHostname() Checks if the given string is a valid hostname according to [RFC 1035](https://tools.ietf.org/html/rfc1035). diff --git a/index.js b/index.js index c6371be..e0029bd 100644 --- a/index.js +++ b/index.js @@ -10,6 +10,7 @@ var allRules = Trie.fromJson(require('./rules.json')); var extractHostname = require('./lib/clean-host.js'); var getDomain = require('./lib/domain.js'); var getPublicSuffix = require('./lib/public-suffix.js'); +var getSLD = require('./lib/sld.js'); var getSubdomain = require('./lib/subdomain.js'); var isValidHostname = require('./lib/is-valid.js'); var isIp = require('./lib/is-ip.js'); @@ -23,8 +24,9 @@ var tldExists = require('./lib/tld-exists.js'); var TLD_EXISTS = 1; var PUBLIC_SUFFIX = 2; var DOMAIN = 3; -var SUB_DOMAIN = 4; -var ALL = 5; +var SLD = 4; +var SUB_DOMAIN = 5; +var ALL = 6; /** * @typedef {object} FactoryOptions @@ -42,6 +44,7 @@ var ALL = 5; * @property {(url: string) => boolean} tldExists * @property {(url: string) => string} getPublicSuffix * @property {(url: string) => string|null} getDomain + * @property {(url: string) => string|null} getSLD * @property {(url: string) => string} getSubdomain * @property {(FactoryOptions) => tldjs} fromUserSettings */ @@ -54,6 +57,7 @@ var ALL = 5; * @property {boolean} tldExists * @property {string|null} publicSuffix * @property {string|null} domain + * @property {string|null} sld * @property {string|null} subdomain */ @@ -90,6 +94,7 @@ function factory(options) { tldExists: false, publicSuffix: null, domain: null, + sld: null, subdomain: null, }; @@ -124,6 +129,12 @@ function factory(options) { result.domain = getDomain(validHosts, result.publicSuffix, result.hostname); if (step === DOMAIN) { return result; } + // Extract SLD + if (step === ALL || step === SLD) { + result.sld = getSLD(result.hostname, result.publicSuffix); + } + if (step === SLD) { return result; } + // Extract subdomain result.subdomain = getSubdomain(result.hostname, result.domain); @@ -147,7 +158,10 @@ function factory(options) { getDomain: function (url) { return parse(url, DOMAIN).domain; }, - getSubdomain: function (url) { + getSLD: function(url) { + return parse(url, SLD).sld; + }, + getSubdomain: function(url) { return parse(url, SUB_DOMAIN).subdomain; }, fromUserSettings: factory diff --git a/lib/sld.js b/lib/sld.js new file mode 100644 index 0000000..507de70 --- /dev/null +++ b/lib/sld.js @@ -0,0 +1,40 @@ +'use strict'; + + +/** + * Returns the Second Level Domain (SLD) of a hostname string + * + * @api + * @param {string} hostname + * @param {string} publicSuffix - the public suffix of the hostname + * @return {string|null} the SLD string if any, otherwise null. + */ +module.exports = function getSLD(hostname, publicSuffix) { + if (typeof hostname !== 'string' || typeof publicSuffix !== 'string') { + return null; + } + + if (!hostname || !publicSuffix) { + return null; + } + + if (hostname === publicSuffix) { + return null; + } + + if (!hostname.endsWith('.' + publicSuffix)) { + return null; + } + + // extract SLD only + var publicSuffixIndex = hostname.length - publicSuffix.length - 2; + var lastDotBeforeSuffixIndex = hostname.lastIndexOf('.', publicSuffixIndex); + + if (lastDotBeforeSuffixIndex === -1) { + // No dot found, the entire part before suffix is the SLD + return hostname.substring(0, hostname.length - publicSuffix.length - 1); + } else { + // Extract the part between the last dot and the suffix + return hostname.substring(lastDotBeforeSuffixIndex + 1, hostname.length - publicSuffix.length - 1); + } +}; diff --git a/test/tld.js b/test/tld.js index 8d836ce..d4af99a 100644 --- a/test/tld.js +++ b/test/tld.js @@ -7,6 +7,7 @@ var tld = require('../index.js'); // valid hostname. Hence, we only use it internally. var isIp = require('../lib/is-ip.js'); +var getSLD = require('../lib/sld.js'); var parser = require('../lib/parsers/publicsuffix-org.js'); var expect = require('expect.js'); @@ -379,6 +380,34 @@ describe('tld.js', function () { }); }); + describe('getSLD method', function () { + it('should return null for invalid cases', function () { + expect(tld.getSLD('')).to.be(null); + expect(tld.getSLD('192.168.0.1')).to.be(null); + expect(tld.getSLD('localhost')).to.be(null); + expect(tld.getSLD('com')).to.be(null); + expect(tld.getSLD('single')).to.be(null); + }); + + it('should extract SLD from domains', function () { + expect(tld.getSLD('google.com')).to.equal('google'); + expect(tld.getSLD('google.co.uk')).to.equal('google'); + expect(tld.getSLD('www.google.com')).to.equal('google'); + expect(tld.getSLD('mail.google.co.uk')).to.equal('google'); + }); + + it('should handle URLs', function () { + expect(tld.getSLD('http://www.google.com/')).to.equal('google'); + expect(tld.getSLD(' GOOGLE.COM ')).to.equal('google'); + expect(tld.getSLD('google.com.')).to.equal('google'); + }); + + it('should handle edge cases', function () { + expect(getSLD('', 'com')).to.be(null); + expect(getSLD('test.notcom', 'com')).to.be(null); + }); + }); + describe('#parse', function () { it('should handle ipv6 addresses properly', function () { expect(tld.parse('http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]')).to.eql({ @@ -388,6 +417,7 @@ describe('tld.js', function () { tldExists: false, publicSuffix: null, domain: null, + sld: null, subdomain: null }); expect(tld.parse('http://user:pass@[::1]/segment/index.html?query#frag')).to.eql({ @@ -397,6 +427,7 @@ describe('tld.js', function () { tldExists: false, publicSuffix: null, domain: null, + sld: null, subdomain: null }); expect(tld.parse('https://[::1]')).to.eql({ @@ -406,6 +437,7 @@ describe('tld.js', function () { tldExists: false, publicSuffix: null, domain: null, + sld: null, subdomain: null }); expect(tld.parse('http://[1080::8:800:200C:417A]/foo')).to.eql({ @@ -415,6 +447,7 @@ describe('tld.js', function () { tldExists: false, publicSuffix: null, domain: null, + sld: null, subdomain: null }); }); @@ -428,6 +461,7 @@ describe('tld.js', function () { tldExists: false, publicSuffix: null, domain: null, + sld: null, subdomain: null, }); @@ -439,9 +473,34 @@ describe('tld.js', function () { // tldExists: false, // publicSuffix: null, // domain: null, + // sld: null, // subdomain: null, // }); }); + + it('should include SLD in parse results for valid domains', function () { + expect(tld.parse('www.google.com')).to.eql({ + hostname: 'www.google.com', + isValid: true, + isIp: false, + tldExists: true, + publicSuffix: 'com', + domain: 'google.com', + sld: 'google', + subdomain: 'www' + }); + + expect(tld.parse('example.co.uk')).to.eql({ + hostname: 'example.co.uk', + isValid: true, + isIp: false, + tldExists: true, + publicSuffix: 'co.uk', + domain: 'example.co.uk', + sld: 'example', + subdomain: '' + }); + }); }); describe('validHosts', function(){