@@ -22,6 +22,19 @@ class SourceHgWebPlugin extends MantisSourcePlugin {
2222 const PLUGIN_VERSION = '2.0.1 ' ;
2323 const FRAMEWORK_VERSION_REQUIRED = '2.0.0 ' ;
2424
25+ /**
26+ * Various PCRE patterns used to parse HgWeb output when retrieving
27+ * changeset info
28+ * @see commit_changeset()
29+ */
30+ const PATTERN_USER = '(?<id>User) (?<user>[^<>]*)(?(?=(?=<))<(?<email>[^<>]*)>|.*) ' ;
31+ const PATTERN_DATE = '(?<id>Date) (?<date>\d+) (?<tz>-?\d+) ' ;
32+ const PATTERN_REVISION = '(?<id>Node ID|Parent) +(?<rev>[0-9a-f]+) ' ;
33+ const PATTERN_DIFF = 'diff[\s]*-r[\s]([^\s]*)[\s]*-r[\s]([^\s]*)[\s]([^\n]*) ' ;
34+ const PATTERN_BINARY_FILE = 'Binary file[\s]([^\r\n\t\f\v]*)[\s]has changed ' ;
35+ # Don't use '/' as pattern delimiter with this one
36+ const PATTERN_PLUS_MINUS = '\-{3}[\s](/dev/null)?[^\t]*[^\n]*\n\+{3}[\s](/dev/null)?[^\t]*\t[^\n]* ' ;
37+
2538 function register () {
2639 $ this ->name = plugin_lang_get ( 'title ' );
2740 $ this ->description = plugin_lang_get ( 'description ' );
@@ -207,7 +220,7 @@ private function import_commits( $p_repo, $p_uri_base, $p_commit_ids, $p_branch=
207220 $ t_commit_url = $ this ->uri_base ( $ p_repo ) . 'raw-rev/ ' . $ t_commit_id ;
208221 $ t_input = url_get ( $ t_commit_url );
209222
210- if ( false === $ t_input ) {
223+ if ( ! $ t_input ) {
211224 echo "failed. \n" ;
212225 continue ;
213226 }
@@ -224,41 +237,69 @@ private function import_commits( $p_repo, $p_uri_base, $p_commit_ids, $p_branch=
224237 return $ t_changesets ;
225238 }
226239
240+ /**
241+ * Parse changeset data and store it if it does not exist already.
242+ * This assumes a standard Mercurial template for raw changesets. Using a
243+ * customized one may break the parsing logic.
244+ * @param SourceRepo $p_repo Repository
245+ * @param string $p_input Raw changeset data
246+ * @param string $p_branch
247+ * @return array SourceChangeset object, list of parent revisions
248+ */
227249 private function commit_changeset ( $ p_repo , $ p_input , $ p_branch ='' ) {
228- $ t_parents = array ();
229- $ t_message = array ();
230-
231250 $ t_input = explode ( "\n" , $ p_input );
251+ $ i = 0 ;
232252
233- foreach ( $ t_input as $ t_line ) {
234- if ( strpos ( $ t_line , '# ' ) === 0 ) {
235- if ( !isset ( $ t_commit ['revision ' ] ) && preg_match ( '@^# Node ID +([a-f0-9]+)@ ' , $ t_line , $ t_matches ) ) {
236- $ t_commit ['revision ' ] = $ t_matches [1 ];
237- echo 'Processing ' . string_display_line ( $ t_commit [revision] ) . '... ' ;
238- if ( SourceChangeset::exists ( $ p_repo ->id , $ t_commit ['revision ' ] ) ) {
239- echo "already exists. \n" ;
240- return array ( null , array () );
241- }
242- } else if ( !isset ( $ t_commit ['author ' ] ) && preg_match ( '@^# User ([^<>]*)(?(?=(?=<))<([^<>]*)>|.*)@ ' , $ t_line , $ t_matches ) ) {
243- $ t_commit ['author ' ] = trim ($ t_matches [1 ]);
244- $ t_commit ['author_email ' ] = $ t_matches [2 ];
245- } else if ( !isset ( $ t_commit ['date ' ] ) && preg_match ( '@^# Date +(\d+) (-?\d+)@ ' , $ t_line , $ t_matches ) ) {
246- $ t_timestamp_gmt = $ t_matches [1 ] - (int )$ t_matches [2 ];
253+ # Skip changeset header
254+ while ( strpos ( $ t_input [$ i ++], '# HG changeset patch ' ) === false );
255+
256+ # Process changeset metadata
257+ $ t_commit = array ();
258+ $ t_parents = array ();
259+ static $ s_pattern_metadata = '/^# (?: '
260+ . self ::PATTERN_USER . '| '
261+ . self ::PATTERN_DATE . '| '
262+ . self ::PATTERN_REVISION
263+ . ')/J ' ;
264+ while ( true ) {
265+ $ t_match = preg_match ( $ s_pattern_metadata , $ t_input [$ i ], $ t_metadata );
266+ if ( $ t_match == false ) {
267+ # We reached the end of metadata, next line is the commit message
268+ break ;
269+ }
270+ switch ( $ t_metadata ['id ' ] ) {
271+ case 'User ' :
272+ $ t_commit ['author ' ] = isset ( $ t_metadata ['user ' ] ) ? trim ( $ t_metadata ['user ' ] ) : '' ;
273+ $ t_commit ['author_email ' ] = isset ( $ t_metadata ['email ' ] ) ? $ t_metadata ['email ' ] : '' ;
274+ break ;
275+ case 'Date ' :
276+ $ t_timestamp_gmt = $ t_metadata ['date ' ] - (int )$ t_metadata ['tz ' ];
247277 $ t_commit ['date ' ] = gmdate ( 'Y-m-d H:i:s ' , $ t_timestamp_gmt );
248- } else if ( !isset ( $ t_commit ['parent ' ] ) && preg_match ( '@^# Parent +([a-f0-9]+)@ ' , $ t_line , $ t_matches ) ) {
249- $ t_parents [] = $ t_matches [1 ];
250- $ t_commit ['parent ' ] = $ t_matches [1 ];
251- }
252- } else if ( isset ( $ t_commit ['revision ' ] ) ) {
253- if ( preg_match ( '@^diff @ ' , $ t_line , $ t_matches ) ) {
254278 break ;
255- }
256- $ t_message [] = $ t_line ;
279+ case 'Node ID ' :
280+ $ t_commit ['revision ' ] = $ t_metadata ['rev ' ];
281+ break ;
282+ case 'Parent ' :
283+ $ t_parents [] = $ t_commit ['parent ' ] = $ t_metadata ['rev ' ];
284+ break ;
257285 }
286+ $ i ++;
258287 }
259288
260- if ( !SourceChangeset::exists ( $ p_repo ->id , $ t_commit ['revision ' ] ) ) {
261- $ t_commit ['message ' ] = implode ( "\n" , $ t_message );
289+ if ( !SourceChangeset::exists ( $ p_repo ->id , $ t_commit ['revision ' ] ) ) {
290+ # Read commit message
291+ $ t_message = '' ;
292+ while ( $ i < count ( $ t_input ) ) {
293+ $ t_match = preg_match (
294+ '/^ ' . self ::PATTERN_DIFF . '/ ' ,
295+ $ t_input [$ i ]
296+ );
297+ if ( $ t_match ) {
298+ break ;
299+ }
300+ $ t_message .= $ t_input [$ i ++] . "\n" ;
301+ }
302+ $ t_commit ['message ' ] = trim ( $ t_message );
262303
263304 $ t_changeset = new SourceChangeset ( $ p_repo ->id , $ t_commit ['revision ' ],
264305 $ p_branch , $ t_commit ['date ' ], $ t_commit ['author ' ],
@@ -268,7 +309,12 @@ private function commit_changeset( $p_repo, $p_input, $p_branch='' ) {
268309
269310 $ t_changeset ->author_email = empty ($ t_commit ['author_email ' ])? '' : $ t_commit ['author_email ' ];
270311
271- preg_match_all ('#diff[\s]*-r[\s]([^\s]*)[\s]*-r[\s]([^\s]*)[\s]([^\n]*)\n(Binary file[\s]([^\r\n\t\f\v]*)[\s]has changed|\-{3}[\s](/dev/null)?[^\t]*[^\n]*\n\+{3}[\s](/dev/null)?[^\t]*\t[^\n]*)#u ' , $ p_input , $ t_matches , PREG_SET_ORDER );
312+ static $ s_pattern_diff = '# '
313+ . self ::PATTERN_DIFF . '\n( '
314+ . self ::PATTERN_BINARY_FILE . '| '
315+ . self ::PATTERN_PLUS_MINUS
316+ . ')#u ' ;
317+ preg_match_all ( $ s_pattern_diff , $ p_input , $ t_matches , PREG_SET_ORDER );
272318
273319 $ t_commit ['files ' ] = array ();
274320
@@ -278,21 +324,21 @@ private function commit_changeset( $p_repo, $p_input, $p_branch='' ) {
278324 $ t_file ['revision ' ] = $ t_commit ['revision ' ];
279325
280326 if (!empty ($ t_file_matches [3 ])) {
281- if (! empty ($ t_file_matches [5 ])) {
282- $ t_file ['action ' ] = 'bin ' ;
327+ if ( empty ($ t_file_matches [5 ]) && empty ( $ t_file_matches [ 6 ]) && empty ( $ t_file_matches [ 7 ])) {
328+ $ t_file ['action ' ] = 'mod ' ;
283329 }
284- else if ( " /dev/null " == $ t_file_matches [7 ] ) {
285- $ t_file ['action ' ] = 'rm ' ;
330+ else if (! empty ( $ t_file_matches [5 ]) ) {
331+ $ t_file ['action ' ] = 'bin ' ;
286332 }
287333 else if ("/dev/null " == $ t_file_matches [6 ]) {
288334 $ t_file ['action ' ] = 'add ' ;
289335 }
336+ else if ("/dev/null " == $ t_file_matches [7 ]) {
337+ $ t_file ['action ' ] = 'rm ' ;
338+ }
290339 else if ("/dev/null " == $ t_file_matches [7 ] && "/dev/null " == $ t_file_matches [6 ]) {
291340 $ t_file ['action ' ] = 'n/a ' ;
292341 }
293- else if (empty ($ t_file_matches [5 ]) && empty ($ t_file_matches [6 ]) && empty ($ t_file_matches [7 ])) {
294- $ t_file ['action ' ] = 'mod ' ;
295- }
296342 }
297343 $ t_commit ['files ' ][] = $ t_file ;
298344 }
0 commit comments