1+ import tlds from "tlds" ;
2+
13/**
24 * Interface representing the components of a URL.
35 * @interface IURLComponents
4- * @property {string } protocol - The URL protocol (e.g., 'http', 'https')
6+ * @property {string } protocol - The URL protocol (e.g., 'http', 'https'), empty if protocol is not present
57 * @property {string } subdomain - The subdomain part of the URL (e.g., 'blog' in 'blog.example.com')
68 * @property {string } rootDomain - The root domain name (e.g., 'example' in 'blog.example.com')
79 * @property {string } tld - The top-level domain (e.g., 'com', 'org')
@@ -18,36 +20,65 @@ export interface IURLComponents {
1820}
1921
2022/**
21- * Extracts components from a URL object.
23+ * Extracts components from a URL object or string .
2224 *
23- * @param {URL } url - The URL object to extract components from
25+ * @param {URL | string } url - The URL object or string to extract components from
2426 * @returns {IURLComponents | undefined } URL components or undefined if invalid
2527 *
2628 * @example
29+ * // With URL object
2730 * const url = new URL('https://blog.example.com/posts');
2831 * extractURLComponents(url);
32+ *
33+ * // With string
34+ * extractURLComponents('blog.example.com/posts');
35+ *
36+ * // Example output:
2937 * // {
30- * // protocol: 'https',
38+ * // protocol: 'https', // empty string if protocol is not present
3139 * // subdomain: 'blog',
3240 * // rootDomain: 'example',
3341 * // tld: 'com',
3442 * // path: 'posts',
35- * // full: URL {} // The original URL object
43+ * // full: URL {} // The parsed URL object
3644 * // }
3745 */
3846
39- export function extractURLComponents ( url : URL ) : IURLComponents | undefined {
47+ export function extractURLComponents ( url : URL | string ) : IURLComponents | undefined {
48+ if ( ! url ) return undefined ;
49+
50+ let cleanedUrl : URL ;
51+ let wasProtocolAdded = false ;
52+
4053 try {
41- const protocol = url . protocol . slice ( 0 , - 1 ) ;
42- const pathname = url . pathname . replace ( / ^ \/ + / , "" ) . replace ( / \/ { 2 , } / g, "/" ) ;
43- const path = pathname + url . search + url . hash ;
44- const hostnameParts = url . hostname . split ( "." ) ;
54+ if ( typeof url === "string" ) {
55+ if ( url . trim ( ) === "" ) return undefined ;
56+
57+ // Check for valid protocol pattern: some characters followed by ://
58+ if ( / ^ [ a - z A - Z ] + : \/ \/ / . test ( url ) ) {
59+ cleanedUrl = new URL ( url ) ;
60+ } else if ( hasValidTLD ( url ) || url . includes ( "localhost" ) ) {
61+ wasProtocolAdded = true ;
62+ cleanedUrl = new URL ( `http://${ url } ` ) ;
63+ } else {
64+ return undefined ;
65+ }
66+ } else {
67+ cleanedUrl = url ;
68+ }
69+
70+ const protocol = cleanedUrl . protocol . slice ( 0 , - 1 ) ;
71+ const pathname = cleanedUrl . pathname . replace ( / ^ \/ + / , "" ) . replace ( / \/ { 2 , } / g, "/" ) ;
72+ const path = pathname + cleanedUrl . search + cleanedUrl . hash ;
73+ const hostnameParts = cleanedUrl . hostname . split ( "." ) ;
4574
4675 let subdomain = "" ;
4776 let rootDomain = "" ;
4877 let tld = "" ;
4978
50- if ( hostnameParts . length >= 2 ) {
79+ if ( hostnameParts . length === 1 ) {
80+ rootDomain = hostnameParts [ 0 ] ; // For cases like 'localhost'
81+ } else if ( hostnameParts . length >= 2 ) {
5182 tld = hostnameParts [ hostnameParts . length - 1 ] ;
5283 rootDomain = hostnameParts [ hostnameParts . length - 2 ] ;
5384
@@ -57,19 +88,90 @@ export function extractURLComponents(url: URL): IURLComponents | undefined {
5788 }
5889
5990 return {
60- protocol,
91+ protocol : wasProtocolAdded ? "" : protocol ,
6192 subdomain,
6293 rootDomain,
6394 tld,
6495 path,
65- full : url ,
96+ full : cleanedUrl ,
6697 } ;
6798 } catch ( error ) {
68- console . error ( `Error extracting URL components: ${ url . href } ` , error ) ;
99+ console . error ( `Error extracting URL components: ${ url ?. toString ( ) || url } ` , error ) ;
69100 return undefined ;
70101 }
71102}
72103
104+ /**
105+ * Checks if a string contains a valid TLD (Top Level Domain) by cleaning the URL and validating against known TLDs.
106+ *
107+ * @param {string } urlString - The string to check for valid TLD
108+ * @returns {boolean } True if the string contains a valid TLD, false otherwise
109+ *
110+ * @description
111+ * The function performs the following steps:
112+ * 1. Basic validation (rejects empty strings, strings starting/ending with dots)
113+ * 2. URL component cleaning:
114+ * - Removes path component (everything after '/')
115+ * - Removes query parameters (everything after '?')
116+ * - Removes hash fragments (everything after '#')
117+ * - Removes port numbers (everything after ':')
118+ * 3. Validates the TLD against a list of known TLDs
119+ *
120+ * @example
121+ * // Valid cases
122+ * hasValidTLD('example.com') // returns true
123+ * hasValidTLD('sub.example.com') // returns true
124+ * hasValidTLD('example.com/path') // returns true (path is stripped)
125+ * hasValidTLD('example.com:8080') // returns true (port is stripped)
126+ * hasValidTLD('example.com?query=1') // returns true (query is stripped)
127+ * hasValidTLD('example.com#hash') // returns true (hash is stripped)
128+ *
129+ * // Invalid cases
130+ * hasValidTLD('') // returns false (empty string)
131+ * hasValidTLD('.example.com') // returns false (starts with dot)
132+ * hasValidTLD('example.com.') // returns false (ends with dot)
133+ * hasValidTLD('example.invalid') // returns false (invalid TLD)
134+ * hasValidTLD('localhost') // returns false (no TLD)
135+ */
136+
137+ function hasValidTLD ( urlString : string ) : boolean {
138+ if ( ! urlString || urlString . startsWith ( "." ) || urlString . endsWith ( "." ) ) {
139+ return false ;
140+ }
141+
142+ let hostname = urlString ;
143+
144+ // Remove path, query, and hash if present
145+ const pathIndex = hostname . indexOf ( "/" ) ;
146+ if ( pathIndex !== - 1 ) {
147+ hostname = hostname . substring ( 0 , pathIndex ) ;
148+ }
149+
150+ const queryIndex = hostname . indexOf ( "?" ) ;
151+ if ( queryIndex !== - 1 ) {
152+ hostname = hostname . substring ( 0 , queryIndex ) ;
153+ }
154+
155+ const hashIndex = hostname . indexOf ( "#" ) ;
156+ if ( hashIndex !== - 1 ) {
157+ hostname = hostname . substring ( 0 , hashIndex ) ;
158+ }
159+
160+ // Remove port if present
161+ const portIndex = hostname . indexOf ( ":" ) ;
162+ if ( portIndex !== - 1 ) {
163+ hostname = hostname . substring ( 0 , portIndex ) ;
164+ }
165+
166+ const hostnameParts = hostname . split ( "." ) ;
167+ if ( hostnameParts . length >= 2 ) {
168+ const potentialTLD = hostnameParts [ hostnameParts . length - 1 ] . toLowerCase ( ) ;
169+ return tlds . includes ( potentialTLD ) ;
170+ }
171+
172+ return false ;
173+ }
174+
73175/**
74176 * Checks if a string is a valid URL.
75177 *
@@ -78,19 +180,48 @@ export function extractURLComponents(url: URL): IURLComponents | undefined {
78180 *
79181 * @example
80182 * // Valid URLs
81- * getValidURL ('https://example.com') // returns URL object
82- * getValidURL ('http://example.com') // returns URL object
83- * getValidURL ('https://sub.example.com') // returns URL object
183+ * isUrlValid ('https://example.com') // returns true
184+ * isUrlValid ('http://example.com') // returns true
185+ * isUrlValid ('https://sub.example.com') // returns true
84186 *
85187 * // Invalid URLs
86- * getValidURL('not-a-url') // returns undefined
87- * getValidURL('example.com') // returns undefined (no protocol)
88- * getValidURL('https://invalid.') // returns undefined
188+ * isUrlValid('not-a-url') // returns false
189+ * isUrlValid('https://invalid.') // returns false
190+ * isUrlValid('example.invalid') // returns false (invalid TLD)
191+ *
192+ * // Test cases:
193+ * // isUrlValid('google.com') // ✅ returns true
194+ * // isUrlValid('github.io') // ✅ returns true
195+ * // isUrlValid('invalid.tld') // ❌ returns false (invalid TLD)
89196 */
90- export function getValidURL ( urlString : string ) : URL | undefined {
91- try {
92- return new URL ( urlString ) ;
93- } catch {
94- return undefined ;
197+
198+ export function isUrlValid ( urlString : string ) : boolean {
199+ // Basic input validation
200+ if ( ! urlString || urlString . trim ( ) === "" ) return false ;
201+
202+ // Handle localhost separately
203+ if ( urlString . startsWith ( "localhost" ) ) {
204+ try {
205+ new URL ( `http://${ urlString } ` ) ;
206+ return true ;
207+ } catch {
208+ return false ;
209+ }
95210 }
211+
212+ // Check for valid protocol format if protocol is present
213+ if ( urlString . includes ( "://" ) ) {
214+ // Reject invalid protocol formats (e.g. "://example.com")
215+ if ( ! / ^ [ a - z A - Z ] + : \/ \/ / . test ( urlString ) ) return false ;
216+ try {
217+ const url = new URL ( urlString ) ;
218+ return ! ! url . hostname && url . hostname !== ".com" ;
219+ } catch {
220+ return false ;
221+ }
222+ }
223+
224+ if ( hasValidTLD ( urlString ) ) return true ;
225+
226+ return false ;
96227}
0 commit comments