@@ -22,6 +22,19 @@ class SourceHgWebPlugin extends MantisSourcePlugin {
2222 const PLUGIN_VERSION = '1.0.1 ' ;
2323 const FRAMEWORK_VERSION_REQUIRED = '1.3.2 ' ;
2424
25+ /**
26+ * Various PCRE patterns used to parse HgWeb output when retrieving
27+ * changeset info
28+ * @see commit_changeset()
29+ */
30+ const PATTERN_USER = '(?<id>User) (?<user>[^<>]*)(?(?=(?=<))<(?<email>[^<>]*)>|.*) ' ;
31+ const PATTERN_DATE = '(?<id>Date) (?<date>\d+) (?<tz>-?\d+) ' ;
32+ const PATTERN_REVISION = '(?<id>Node ID|Parent) +(?<rev>[0-9a-f]+) ' ;
33+ const PATTERN_DIFF = 'diff[\s]*-r[\s]([^\s]*)[\s]*-r[\s]([^\s]*)[\s]([^\n]*) ' ;
34+ const PATTERN_BINARY_FILE = 'Binary file[\s]([^\r\n\t\f\v]*)[\s]has changed ' ;
35+ # Don't use '/' as pattern delimiter with this one
36+ const PATTERN_PLUS_MINUS = '\-{3}[\s](/dev/null)?[^\t]*[^\n]*\n\+{3}[\s](/dev/null)?[^\t]*\t[^\n]* ' ;
37+
2538 function register () {
2639 $ this ->name = plugin_lang_get ( 'title ' );
2740 $ this ->description = plugin_lang_get ( 'description ' );
@@ -231,41 +244,69 @@ private function import_commits( $p_repo, $p_uri_base, $p_commit_ids, $p_branch=
231244 return $ t_changesets ;
232245 }
233246
247+ /**
248+ * Parse changeset data and store it if it does not exist already.
249+ * This assumes a standard Mercurial template for raw changesets. Using a
250+ * customized one may break the parsing logic.
251+ * @param SourceRepo $p_repo Repository
252+ * @param string $p_input Raw changeset data
253+ * @param string $p_branch
254+ * @return array SourceChangeset object, list of parent revisions
255+ */
234256 private function commit_changeset ( $ p_repo , $ p_input , $ p_branch ='' ) {
235- $ t_parents = array ();
236- $ t_message = array ();
237-
238257 $ t_input = explode ( "\n" , $ p_input );
258+ $ i = 0 ;
239259
240- foreach ( $ t_input as $ t_line ) {
241- if ( strpos ( $ t_line , '# ' ) === 0 ) {
242- if ( !isset ( $ t_commit ['revision ' ] ) && preg_match ( '@^# Node ID +([a-f0-9]+)@ ' , $ t_line , $ t_matches ) ) {
243- $ t_commit ['revision ' ] = $ t_matches [1 ];
244- echo 'Processing ' . string_display_line ( $ t_commit ['revision ' ] ) . '... ' ;
245- if ( SourceChangeset::exists ( $ p_repo ->id , $ t_commit ['revision ' ] ) ) {
246- echo "already exists. \n" ;
247- return array ( null , array () );
248- }
249- } else if ( !isset ( $ t_commit ['author ' ] ) && preg_match ( '@^# User ([^<>]*)(?(?=(?=<))<([^<>]*)>|.*)@ ' , $ t_line , $ t_matches ) ) {
250- $ t_commit ['author ' ] = trim ($ t_matches [1 ]);
251- $ t_commit ['author_email ' ] = $ t_matches [2 ];
252- } else if ( !isset ( $ t_commit ['date ' ] ) && preg_match ( '@^# Date +(\d+) (-?\d+)@ ' , $ t_line , $ t_matches ) ) {
253- $ t_timestamp_gmt = $ t_matches [1 ] - (int )$ t_matches [2 ];
260+ # Skip changeset header
261+ while ( strpos ( $ t_input [$ i ++], '# HG changeset patch ' ) === false );
262+
263+ # Process changeset metadata
264+ $ t_commit = array ();
265+ $ t_parents = array ();
266+ static $ s_pattern_metadata = '/^# (?: '
267+ . self ::PATTERN_USER . '| '
268+ . self ::PATTERN_DATE . '| '
269+ . self ::PATTERN_REVISION
270+ . ')/J ' ;
271+ while ( true ) {
272+ $ t_match = preg_match ( $ s_pattern_metadata , $ t_input [$ i ], $ t_metadata );
273+ if ( $ t_match == false ) {
274+ # We reached the end of metadata, next line is the commit message
275+ break ;
276+ }
277+ switch ( $ t_metadata ['id ' ] ) {
278+ case 'User ' :
279+ $ t_commit ['author ' ] = isset ( $ t_metadata ['user ' ] ) ? trim ( $ t_metadata ['user ' ] ) : '' ;
280+ $ t_commit ['author_email ' ] = isset ( $ t_metadata ['email ' ] ) ? $ t_metadata ['email ' ] : '' ;
281+ break ;
282+ case 'Date ' :
283+ $ t_timestamp_gmt = $ t_metadata ['date ' ] - (int )$ t_metadata ['tz ' ];
254284 $ t_commit ['date ' ] = gmdate ( 'Y-m-d H:i:s ' , $ t_timestamp_gmt );
255- } else if ( !isset ( $ t_commit ['parent ' ] ) && preg_match ( '@^# Parent +([a-f0-9]+)@ ' , $ t_line , $ t_matches ) ) {
256- $ t_parents [] = $ t_matches [1 ];
257- $ t_commit ['parent ' ] = $ t_matches [1 ];
258- }
259- } else if ( isset ( $ t_commit ['revision ' ] ) ) {
260- if ( preg_match ( '@^diff @ ' , $ t_line , $ t_matches ) ) {
261285 break ;
262- }
263- $ t_message [] = $ t_line ;
286+ case 'Node ID ' :
287+ $ t_commit ['revision ' ] = $ t_metadata ['rev ' ];
288+ break ;
289+ case 'Parent ' :
290+ $ t_parents [] = $ t_commit ['parent ' ] = $ t_metadata ['rev ' ];
291+ break ;
264292 }
293+ $ i ++;
265294 }
266295
267- if ( !SourceChangeset::exists ( $ p_repo ->id , $ t_commit ['revision ' ] ) ) {
268- $ t_commit ['message ' ] = implode ( "\n" , $ t_message );
296+ if ( !SourceChangeset::exists ( $ p_repo ->id , $ t_commit ['revision ' ] ) ) {
297+ # Read commit message
298+ $ t_message = '' ;
299+ while ( $ i < count ( $ t_input ) ) {
300+ $ t_match = preg_match (
301+ '/^ ' . self ::PATTERN_DIFF . '/ ' ,
302+ $ t_input [$ i ]
303+ );
304+ if ( $ t_match ) {
305+ break ;
306+ }
307+ $ t_message .= $ t_input [$ i ++] . "\n" ;
308+ }
309+ $ t_commit ['message ' ] = trim ( $ t_message );
269310
270311 $ t_changeset = new SourceChangeset ( $ p_repo ->id , $ t_commit ['revision ' ],
271312 $ p_branch , $ t_commit ['date ' ], $ t_commit ['author ' ],
@@ -275,7 +316,12 @@ private function commit_changeset( $p_repo, $p_input, $p_branch='' ) {
275316
276317 $ t_changeset ->author_email = empty ($ t_commit ['author_email ' ])? '' : $ t_commit ['author_email ' ];
277318
278- preg_match_all ('#diff[\s]*-r[\s]([^\s]*)[\s]*-r[\s]([^\s]*)[\s]([^\n]*)\n(Binary file[\s]([^\r\n\t\f\v]*)[\s]has changed|\-{3}[\s](/dev/null)?[^\t]*[^\n]*\n\+{3}[\s](/dev/null)?[^\t]*\t[^\n]*)#u ' , $ p_input , $ t_matches , PREG_SET_ORDER );
319+ static $ s_pattern_diff = '# '
320+ . self ::PATTERN_DIFF . '\n( '
321+ . self ::PATTERN_BINARY_FILE . '| '
322+ . self ::PATTERN_PLUS_MINUS
323+ . ')#u ' ;
324+ preg_match_all ( $ s_pattern_diff , $ p_input , $ t_matches , PREG_SET_ORDER );
279325
280326 $ t_commit ['files ' ] = array ();
281327
0 commit comments