11using System . Net . Http . Headers ;
2- using System . Text ;
2+ using System . Runtime . InteropServices ;
3+ using System . Text . RegularExpressions ;
4+ using System . Web ;
35
46namespace DownloadAssistant . Media
57{
68 /// <summary>
7- /// Class to build a file metadata .
9+ /// Provides file metadata extraction and sanitization capabilities for downloaded content .
810 /// </summary>
911 public class FileMetadata
1012 {
1113 private readonly HttpContentHeaders _headers ;
1214 private readonly Uri _uri ;
1315
16+ private static readonly string [ ] DispositionHeaders = { /* IETF Standard */ "Content-Disposition" , "X-Content-Disposition" } ;
17+
18+ private static readonly string [ ] FilenameHeaders = {
19+ /* Cloud Provider Headers */ "X-Amz-Meta-Filename" , "x-ms-meta-Filename" , "X-Google-Filename" ,
20+ /* Framework Headers */ "X-Django-FileName" , "X-File-Key" ,
21+ /* CDN/Proxy Headers */ "X-Original-Filename" , "X-Source-Filename" ,
22+ /* Common Industry Headers */ "X-Filename" , "X-File-Name" , "X-Object-Name"
23+ } ;
24+
25+ private static readonly string [ ] RedirectHeaders = {
26+ /* RFC 7231 Standard */ "Location" ,
27+ /* Nginx */ "X-Accel-Redirect" ,
28+ /* Apache */ "X-Sendfile"
29+ } ;
30+
31+ private static readonly string [ ] QueryParamPriority =
32+ {
33+ /* Standard Parameters*/ "filename" , "file" , "name" , "key" ,
34+ /* Platform-Specific Parameters */
35+ "file_name" , "file-name" , "googleStorageFileName" , "azureFileName" , "amzFileName"
36+ } ;
37+
38+ private static readonly Dictionary < string , Func < string , string > > ContentHeaderMap = new ( StringComparer . OrdinalIgnoreCase )
39+ {
40+ [ "Content-Location" ] = v => Path . GetFileName ( new Uri ( v ) . AbsolutePath ) ,
41+ [ "X-Content-Name" ] = v => v ,
42+ [ "X-ShareFile-Name" ] = v => Uri . UnescapeDataString ( v )
43+ } ;
44+
1445 /// <summary>
15- /// Gets the name of the file .
46+ /// Gets the sanitized filename for the downloaded content .
1647 /// </summary>
1748 public string FileName { get ; private set ; } = string . Empty ;
1849
1950 /// <summary>
20- /// Gets the extension of the file .
51+ /// Gets the appropriate file extension based on content type or URI .
2152 /// </summary>
2253 public string Extension { get ; private set ; } = string . Empty ;
2354
2455 /// <summary>
25- /// Initializes a new instance of the <see cref=" FileMetadata"/> class.
56+ /// Initializes a new instance of the FileMetadata class.
2657 /// </summary>
27- /// <param name="headers"><see cref="HttpContentHeaders"/> of the response.</param>
28- /// <param name="uri">The URL to the file .</param>
58+ /// <param name="headers">HTTP content headers from the response.</param>
59+ /// <param name="uri">Source URI of the downloaded content .</param>
2960 public FileMetadata ( HttpContentHeaders headers , Uri uri )
3061 {
3162 _headers = headers ;
@@ -35,56 +66,244 @@ public FileMetadata(HttpContentHeaders headers, Uri uri)
3566 }
3667
3768 /// <summary>
38- /// Sets the extension of a request file based on the content type or the URI.
69+ /// Determines the file extension from Content-Type header or URI path .
3970 /// </summary>
40- public void SetExtension ( )
71+ private void SetExtension ( )
4172 {
42- Extension = string . Empty ;
43- if ( _headers . ContentType ? . MediaType != null )
44- Extension = MimeTypeMap . GetDefaultExtension ( _headers . ContentType . MediaType ) ;
45- if ( Extension == string . Empty )
46- Extension = Path . GetExtension ( _uri . AbsoluteUri ) ;
73+ Extension = _headers . ContentType ? . MediaType != null
74+ ? MimeTypeMap . GetDefaultExtension ( _headers . ContentType . MediaType )
75+ : Path . GetExtension ( _uri . AbsoluteUri ) ;
4776 }
4877
4978 /// <summary>
50- /// Generates the filename from the header or the URI .
79+ /// Main filename determination workflow with fallback strategies .
5180 /// </summary>
5281 private void SetFilename ( )
5382 {
54- FileName = RemoveInvalidFileNameChars ( _headers . ContentDisposition ? . FileNameStar ?? _headers . ContentDisposition ? . FileName ?? string . Empty ) ;
55- if ( FileName == string . Empty )
83+ FileName = GetFilenameFromHeaders ( )
84+ ?? GetFilenameFromUri ( )
85+ ?? GetFilenameFromAlternateSources ( )
86+ ?? GenerateFallbackFilename ( ) ;
87+
88+ FileName = SanitizeFilename ( FileName ) ;
89+ }
90+
91+ /// <summary>
92+ /// Attempts to extract filename from Content-Disposition headers.
93+ /// </summary>
94+ /// <returns>Valid filename or null if not found.</returns>
95+ private string ? GetFilenameFromHeaders ( )
96+ {
97+ ContentDispositionHeaderValue ? contentDisposition = _headers . ContentDisposition ;
98+ if ( contentDisposition != null )
99+ {
100+ string ? filename = contentDisposition . FileNameStar ?? contentDisposition . FileName ;
101+ if ( ! string . IsNullOrWhiteSpace ( filename ) )
102+ return Uri . UnescapeDataString ( filename . Trim ( '"' , '\' ' ) . Trim ( ) ) ;
103+ }
104+
105+ return CheckAlternativeDispositionHeaders ( ) ;
106+ }
107+
108+ private string ? GetFilenameFromUri ( )
109+ {
110+ try
56111 {
57- FileName = RemoveInvalidFileNameChars ( _uri . Segments . Last ( ) ?? string . Empty ) ;
58- if ( FileName == string . Empty )
59- FileName = RemoveInvalidFileNameChars ( Path . GetFileName ( _uri . AbsoluteUri ) ?? string . Empty ) ;
60- if ( FileName == string . Empty )
61- FileName = "requested_download_" + RemoveInvalidFileNameChars ( _uri . Host ) ;
62- FileName = FileName . Replace ( "%20" , " " ) ;
63- FileName = FileName . Length > 80 ? FileName . Remove ( 80 ) : FileName ;
112+ string path = _uri . GetLeftPart ( UriPartial . Path ) ;
113+ Uri cleanUri = new ( path ) ;
114+
115+ string filename = Path . GetFileName ( cleanUri . AbsolutePath ) ;
116+ if ( ! string . IsNullOrWhiteSpace ( filename ) )
117+ return Uri . UnescapeDataString ( filename ) ;
118+
119+ string ? patternMatch = cleanUri . Segments . LastOrDefault ( s => s . Contains ( '.' ) && ! s . EndsWith ( '/' ) ) ? . Trim ( '/' ) ;
120+
121+ if ( ! string . IsNullOrWhiteSpace ( patternMatch ) )
122+ return patternMatch ;
123+
124+ string decodedUrl = HttpUtility . UrlDecode ( cleanUri . AbsoluteUri ) ;
125+ string decodedFilename = Path . GetFileName ( decodedUrl ) ;
126+ if ( ! string . IsNullOrWhiteSpace ( decodedFilename ) )
127+ return decodedFilename ;
64128 }
129+ catch { /* Log error if needed */ }
130+
131+ return null ;
65132 }
66133
67134 /// <summary>
68- /// Builds the filename based on the preset filename or the generated filename .
135+ /// Checks alternative disposition headers for filename information .
69136 /// </summary>
70- /// <param name="preSetFilename">The preset filename.</param>
71- /// <returns>The built filename.</returns>
72- public string BuildFilename ( string preSetFilename )
137+ /// <returns>Valid filename or null if not found.</returns>
138+ private string ? CheckAlternativeDispositionHeaders ( )
73139 {
74- string fileName = preSetFilename ;
75- if ( fileName == string . Empty || fileName == "*" || fileName == "*.*" )
140+ foreach ( string header in DispositionHeaders )
76141 {
77- fileName = FileName ;
78- fileName = fileName . Contains ( '.' ) ? fileName : fileName + Extension ;
142+ if ( ! _headers . TryGetValues ( header , out IEnumerable < string > ? values ) ) continue ;
143+
144+ string ? value = values . FirstOrDefault ( ) ;
145+ if ( string . IsNullOrWhiteSpace ( value ) ) continue ;
146+
147+ Match match = Regex . Match ( value , @"filename\*?=([^;]+)" , RegexOptions . IgnoreCase ) ;
148+ if ( match . Success ) return Uri . UnescapeDataString ( match . Groups [ 1 ] . Value . Trim ( '"' , '\' ' ) . Trim ( ) ) ;
149+
79150 }
80- else
151+ return null ;
152+ }
153+
154+ /// <summary>
155+ /// Coordinates multiple alternate filename discovery strategies.
156+ /// </summary>
157+ /// <returns>Valid filename or null if not found.</returns>
158+ private string ? GetFilenameFromAlternateSources ( )
159+ {
160+ return ExtractFromQueryParameters ( )
161+ ?? ExtractFromContentHeaders ( )
162+ ?? ExtractFromRedirectHeaders ( )
163+ ?? ExtractFromCustomHeaders ( )
164+ ?? ExtractFromContentType ( ) ;
165+ }
166+
167+ /// <summary>
168+ /// Extracts filename from content-related headers (Content-Location, etc.).
169+ /// </summary>
170+ /// <returns>Valid filename or null if not found.</returns>
171+ private string ? ExtractFromContentHeaders ( )
172+ {
173+ foreach ( KeyValuePair < string , Func < string , string > > header in ContentHeaderMap )
81174 {
82- if ( fileName . Contains ( "*." ) )
83- fileName = ReplaceFirst ( fileName , "*." , Path . GetFileNameWithoutExtension ( FileName ) + "." ) ;
84- if ( fileName . Contains ( ".*" ) )
85- fileName = ReplaceFirst ( fileName , ".*" , Extension ) ;
175+ if ( ! _headers . TryGetValues ( header . Key , out IEnumerable < string > ? values ) ) continue ;
176+
177+ string ? value = values . FirstOrDefault ( ) ;
178+ if ( string . IsNullOrWhiteSpace ( value ) ) continue ;
179+
180+ try
181+ {
182+ return Uri . UnescapeDataString ( header . Value ( value ) . Trim ( ) ) ;
183+ }
184+ catch { }
86185 }
87- return fileName ;
186+ return null ;
187+ }
188+
189+ /// <summary>
190+ /// Attempts to extract filename from redirect-related headers.
191+ /// </summary>
192+ /// <returns>Valid filename or null if not found.</returns>
193+ private string ? ExtractFromRedirectHeaders ( )
194+ {
195+ foreach ( string header in RedirectHeaders )
196+ {
197+ if ( ! _headers . TryGetValues ( header , out IEnumerable < string > ? values ) ) continue ;
198+
199+ string ? value = values . FirstOrDefault ( ) ;
200+ if ( string . IsNullOrWhiteSpace ( value ) ) continue ;
201+
202+ if ( ! Uri . TryCreate ( value , UriKind . Absolute , out Uri ? uri ) ) continue ;
203+
204+ string filename = Path . GetFileName ( uri . AbsolutePath ) ;
205+ if ( ! string . IsNullOrWhiteSpace ( filename ) )
206+ return Uri . UnescapeDataString ( filename ) ;
207+ }
208+ return null ;
209+ }
210+
211+ /// <summary>
212+ /// Checks custom filename headers from various cloud providers and frameworks.
213+ /// </summary>
214+ /// <returns>Valid filename or null if not found.</returns>
215+ private string ? ExtractFromCustomHeaders ( )
216+ {
217+ foreach ( string header in FilenameHeaders )
218+ {
219+ if ( ! _headers . TryGetValues ( header , out IEnumerable < string > ? values ) ) continue ;
220+
221+ string ? value = values . FirstOrDefault ( ) ;
222+ if ( ! string . IsNullOrWhiteSpace ( value ) )
223+ return Uri . UnescapeDataString ( value . Trim ( ) ) ;
224+ }
225+ return null ;
226+ }
227+
228+ /// <summary>
229+ /// Analyzes query parameters for potential filename hints.
230+ /// </summary>
231+ /// <returns>Valid filename or null if not found.</returns>
232+ private string ? ExtractFromQueryParameters ( )
233+ {
234+ try
235+ {
236+ System . Collections . Specialized . NameValueCollection queryParams = HttpUtility . ParseQueryString ( _uri . Query ) ;
237+ foreach ( string param in QueryParamPriority )
238+ {
239+ string ? value = queryParams [ param ] ;
240+ if ( string . IsNullOrWhiteSpace ( value ) ) continue ;
241+
242+ string decoded = HttpUtility . UrlDecode ( value ) ;
243+ return Path . GetFileName ( decoded ) . Trim ( ) ;
244+ }
245+ }
246+ catch { /* Log error if needed */ }
247+ return null ;
248+ }
249+
250+ /// <summary>
251+ /// Extracts filename information from Content-Type header parameters.
252+ /// </summary>
253+ /// <returns>Valid filename or null if not found.</returns>
254+ private string ? ExtractFromContentType ( )
255+ {
256+ if ( _headers . ContentType ? . MediaType == null ) return null ;
257+
258+ ICollection < NameValueHeaderValue > parameters = _headers . ContentType . Parameters ;
259+ NameValueHeaderValue ? nameParam = parameters . FirstOrDefault ( p =>
260+ p . Name . Equals ( "name" , StringComparison . OrdinalIgnoreCase ) ||
261+ p . Name . Equals ( "filename" , StringComparison . OrdinalIgnoreCase ) ) ;
262+
263+ if ( nameParam != null && ! string . IsNullOrWhiteSpace ( nameParam . Value ) )
264+ return Uri . UnescapeDataString ( nameParam . Value . Trim ( '"' , '\' ' ) ) ;
265+
266+ if ( ! _headers . TryGetValues ( "X-Content-Type-Filename" , out IEnumerable < string > ? values ) ) return null ;
267+
268+ string ? value = values . FirstOrDefault ( ) ;
269+ return ! string . IsNullOrWhiteSpace ( value ) ? Uri . UnescapeDataString ( value . Trim ( ) ) : null ;
270+ }
271+
272+ /// <summary>
273+ /// Generates a fallback filename using timestamp and host information.
274+ /// </summary>
275+ /// <returns>A generated filename in the format "download_YYYYMMDD-HHMMSS_host".</returns>
276+ private string GenerateFallbackFilename ( )
277+ {
278+ string timestamp = DateTime . Now . ToString ( "yyyyMMdd-HHmmss" ) ;
279+ string hostPart = RemoveInvalidFileNameChars ( _uri . Host ) ;
280+ return $ "download_{ timestamp } _{ hostPart } ";
281+ }
282+
283+ /// <summary>
284+ /// Sanitizes and normalizes filenames with security considerations.
285+ /// </summary>
286+ /// <param name="fileName">Raw filename input.</param>
287+ /// <returns>Safe, normalized filename with OS-specific length constraints.</returns>
288+ private string SanitizeFilename ( string fileName )
289+ {
290+ string cleanName = RemoveInvalidFileNameChars ( fileName ) . Trim ( ) ;
291+ if ( string . IsNullOrEmpty ( cleanName ) )
292+ return GenerateFallbackFilename ( ) ;
293+
294+ cleanName = Path . GetFileName ( cleanName . Replace ( ".." , "." ) ) ;
295+
296+ int maxLength = RuntimeInformation . IsOSPlatform ( OSPlatform . Windows ) ? 80 : 255 ;
297+
298+ if ( cleanName . Length <= maxLength ) return cleanName ;
299+
300+ string extension = Path . GetExtension ( cleanName ) ;
301+ string baseName = Path . GetFileNameWithoutExtension ( cleanName ) ;
302+
303+ int allowedBaseLength = maxLength - extension . Length ;
304+ return allowedBaseLength <= 0
305+ ? cleanName [ ..maxLength ]
306+ : $ "{ baseName [ ..allowedBaseLength ] } { extension } ";
88307 }
89308
90309 /// <summary>
@@ -101,17 +320,41 @@ private static string ReplaceFirst(string text, string search, string replace)
101320 return text ;
102321 return string . Concat ( text . AsSpan ( 0 , pos ) , replace , text . AsSpan ( pos + search . Length ) ) ;
103322 }
323+
324+ /// <summary>
325+ /// Builds the filename based on the preset filename or the generated filename.
326+ /// </summary>
327+ /// <param name="preSetFilename">The preset filename.</param>
328+ /// <returns>The built filename.</returns>
329+ public string BuildFilename ( string preSetFilename )
330+ {
331+ string fileName = preSetFilename ;
332+ if ( fileName == string . Empty || fileName == "*" || fileName == "*.*" )
333+ {
334+ fileName = FileName ;
335+ fileName = fileName . Contains ( '.' ) ? fileName : fileName + Extension ;
336+ }
337+ else
338+ {
339+ if ( fileName . Contains ( "*." ) )
340+ fileName = ReplaceFirst ( fileName , "*." , Path . GetFileNameWithoutExtension ( FileName ) + "." ) ;
341+ if ( fileName . Contains ( ".*" ) )
342+ fileName = ReplaceFirst ( fileName , ".*" , Extension ) ;
343+ }
344+ return fileName ;
345+ }
346+
104347 /// <summary>
105- /// Removes all invalid characters for a filename out of a string.
348+ /// Replaces invalid filesystem characters from a filename string.
106349 /// </summary>
107- /// <param name="input">The input filename.</param>
108- /// <returns>The cleared filename.</returns>
350+ /// <param name="input">Original filename.</param>
351+ /// <returns>Sanitized filename with invalid characters removed .</returns>
109352 public static string RemoveInvalidFileNameChars ( string input )
110353 {
111- StringBuilder fileBuilder = new ( input ) ;
112- foreach ( char c in Path . GetInvalidFileNameChars ( ) )
113- fileBuilder . Replace ( c . ToString ( ) , string . Empty ) ;
114- return fileBuilder . ToString ( ) ;
354+ if ( string . IsNullOrEmpty ( input ) ) return input ;
355+
356+ char [ ] invalidChars = Path . GetInvalidFileNameChars ( ) ;
357+ return new string ( input . Where ( c => ! invalidChars . Contains ( c ) ) . ToArray ( ) ) ;
115358 }
116359 }
117- }
360+ }
0 commit comments