99Inspired by Sean Patrick O'Brien (@obriensp)'s 2013 "proto-dump": https://github.com/obriensp/proto-dump
1010"""
1111
12+ import sys
1213from pathlib import Path
1314from tqdm import tqdm
15+ from typing import List
16+ from collections import defaultdict
1417
1518from google .protobuf .internal .decoder import _DecodeVarint , SkipField
1619from google .protobuf import descriptor_pb2
1720from google .protobuf .descriptor_pool import DescriptorPool
1821from google .protobuf .message import DecodeError
19- from google .protobuf .internal import api_implementation
22+
23+
24+ PROTO_TYPES = {
25+ 1 : 'double' ,
26+ 2 : 'float' ,
27+ 3 : 'int64' ,
28+ 4 : 'uint64' ,
29+ 5 : 'int32' ,
30+ 6 : 'fixed64' ,
31+ 7 : 'fixed32' ,
32+ 8 : 'bool' ,
33+ 9 : 'string' ,
34+ 12 : 'bytes' ,
35+ 13 : 'uint32' ,
36+ 15 : 'sfixed32' ,
37+ 16 : 'sfixed64' ,
38+ 17 : 'sint32' ,
39+ 18 : 'sint64' ,
40+ }
41+
42+ def to_proto_file (fds : descriptor_pb2 .FileDescriptorSet ) -> str :
43+ if len (fds .file ) != 1 :
44+ raise NotImplementedError ("Only one file per fds." )
45+ f = fds .file [0 ]
46+ lines = [
47+ "syntax = \" proto2\" ;" ,
48+ ""
49+ ]
50+
51+ for dependency in f .dependency :
52+ lines .append (f'import "{ dependency } ";' )
53+
54+ lines .append (f'package { f .package } ;' )
55+ lines .append ("" )
56+
57+ def generate_enum_lines (f , lines : List [str ], indent : int = 0 ):
58+ prefix = " " * indent
59+ for enum in f .enum_type :
60+ lines .append (prefix + f"enum { enum .name } " + '{' )
61+ for value in enum .value :
62+ lines .append (prefix + f" { value .name } = { value .number } ;" )
63+ lines .append (prefix + '}' )
64+
65+
66+ def generate_field_line (field , in_oneof : bool = False ) -> str :
67+ line = []
68+ if field .label == 1 :
69+ if not in_oneof :
70+ line .append ("optional" )
71+ elif field .label == 2 :
72+ line .append ("required" )
73+ elif field .label == 3 :
74+ line .append ("repeated" )
75+ else :
76+ raise NotImplementedError ("Unknown field label type!" )
77+
78+ if field .type in PROTO_TYPES :
79+ line .append (PROTO_TYPES [field .type ])
80+ elif field .type == 11 or field .type == 14 : # MESSAGE
81+ line .append (field .type_name )
82+ else :
83+ raise NotImplementedError (f"Unknown field type { field .type } !" )
84+
85+ line .append (field .name )
86+ line .append ("=" )
87+ line .append (str (field .number ));
88+ options = []
89+ if field .default_value :
90+ options .append (f"default = { field .default_value } " )
91+ if field .options .deprecated :
92+ options .append ("deprecated = true" )
93+ if field .options .packed :
94+ options .append ("packed = true" )
95+ # TODO: Protobuf supports other options in square brackets!
96+ # Add support for them here to make this feature-complete.
97+ if options :
98+ line .append (f"[{ ', ' .join (options )} ]" )
99+ return f" { ' ' .join (line )} ;"
100+
101+ def generate_extension_lines (message , lines : List [str ], indent : int = 0 ):
102+ prefix = " " * indent
103+ extensions_grouped_by_extendee = defaultdict (list )
104+ for extension in message .extension :
105+ extensions_grouped_by_extendee [extension .extendee ].append (extension )
106+ for extendee , extensions in extensions_grouped_by_extendee .items ():
107+ lines .append (prefix + f"extend { extendee } {{" )
108+ for extension in extensions :
109+ lines .append (prefix + generate_field_line (extension ))
110+ lines .append (prefix + "}" )
111+
112+ def generate_message_lines (f , lines : List [str ], indent : int = 0 ):
113+ prefix = " " * indent
114+
115+ submessages = f .message_type if hasattr (f , 'message_type' ) else f .nested_type
116+
117+ for message in submessages :
118+ # if message.name == "ContainedObjectsCommandArchive":
119+ # breakpoint()
120+ lines .append (prefix + f"message { message .name } " + '{' )
121+
122+ generate_enum_lines (message , lines , indent + 1 )
123+ generate_message_lines (message , lines , indent + 1 )
124+
125+ for field in message .field :
126+ if not field .HasField ("oneof_index" ):
127+ lines .append (prefix + generate_field_line (field ))
128+
129+ # ...then the oneofs:
130+ next_prefix = " " * (indent + 1 )
131+ for oneof_index , oneof in enumerate (message .oneof_decl ):
132+ lines .append (next_prefix + f"oneof { oneof .name } {{" )
133+ for field in message .field :
134+ if field .HasField ("oneof_index" ) and field .oneof_index == oneof_index :
135+ lines .append (next_prefix + generate_field_line (field , in_oneof = True ))
136+ lines .append (next_prefix + "}" )
137+
138+ if len (message .extension_range ):
139+ if len (message .extension_range ) > 1 :
140+ raise NotImplementedError ("Not sure how to handle multiple extension ranges!" )
141+ start , end = (
142+ message .extension_range [0 ].start ,
143+ min (message .extension_range [0 ].end , 536870911 )
144+ )
145+ lines .append (next_prefix + f"extensions { start } to { end } ;" )
146+
147+ generate_extension_lines (message , lines , indent + 1 )
148+ lines .append (prefix + '}' )
149+ lines .append ('' )
150+
151+ generate_enum_lines (f , lines )
152+ generate_message_lines (f , lines )
153+ generate_extension_lines (f , lines )
154+
155+ return "\n " .join (lines )
20156
21157
22158class ProtoFile (object ):
@@ -39,19 +175,24 @@ def attempt_to_load(self):
39175 try :
40176 return self .pool .Add (self .file_descriptor_proto )
41177 except Exception as e :
178+ if "duplicate file name" in str (e ):
179+ return self .pool .FindFileByName (e .args [0 ].split ("duplicate file name" )[1 ].strip ())
42180 return None
43181
44182 @property
45183 def descriptor (self ):
46184 return self .attempt_to_load ()
47185
48186 def __repr__ (self ):
49- return " <%s: path=\ " %s\" >" % (self .__class__ .__name__ , self .path )
187+ return ' <%s: path="%s">' % (self .__class__ .__name__ , self .path )
50188
51189 @property
52190 def source (self ):
53191 if self .descriptor :
54- return self .descriptor .GetDebugString ()
192+ fds = descriptor_pb2 .FileDescriptorSet ()
193+ fds .file .append (descriptor_pb2 .FileDescriptorProto ())
194+ fds .file [0 ].ParseFromString (self .descriptor .serialized_pb )
195+ return to_proto_file (fds )
55196 return None
56197
57198
@@ -77,19 +218,19 @@ def read_until_null_tag(data):
77218
78219
79220def extract_proto_from_file (filename , descriptor_pool ):
80- with open (filename , 'rb' ) as f :
221+ with open (filename , "rb" ) as f :
81222 data = f .read ()
82223 offset = 0
83224
84- PROTO_MARKER = b' .proto'
225+ PROTO_MARKER = b" .proto"
85226
86227 while True :
87228 # Look for ".proto"
88229 suffix_position = data .find (PROTO_MARKER , offset )
89230 if suffix_position == - 1 :
90231 break
91232
92- marker_start = data .rfind (b' \x0A ' , offset , suffix_position )
233+ marker_start = data .rfind (b" \x0A " , offset , suffix_position )
93234 if marker_start == - 1 :
94235 # Doesn't look like a proto descriptor
95236 offset = suffix_position + len (PROTO_MARKER )
@@ -163,16 +304,9 @@ def main():
163304 parser .add_argument ("output_path" , help = "Output directory to dump .protoc files to." )
164305
165306 args = parser .parse_args ()
166-
167- if api_implementation .Type () != "cpp" :
168- raise NotImplementedError (
169- "This script requires the Protobuf installation to use the C++ implementation. Please"
170- " reinstall Protobuf with C++ support."
171- )
172-
173307 GLOBAL_DESCRIPTOR_POOL = DescriptorPool ()
174308
175- all_filenames = [str (path ) for path in Path (args .input_path ).rglob ('*' ) if not path .is_dir ()]
309+ all_filenames = [str (path ) for path in Path (args .input_path ).rglob ("*" ) if not path .is_dir ()]
176310
177311 print (
178312 f"Scanning { len (all_filenames ):,} files under { args .input_path } for protobuf definitions..."
@@ -190,11 +324,16 @@ def main():
190324 if not found .attempt_to_load ():
191325 missing_deps .update (find_missing_dependencies (proto_files_found , found .path ))
192326
327+ for found in proto_files_found :
328+ if not found .attempt_to_load ():
329+ missing_deps .add (found )
330+
193331 if missing_deps :
194332 print (
195333 f"Unable to print out all Protobuf definitions; { len (missing_deps ):,} proto files could"
196334 f" not be found:\n { missing_deps } "
197335 )
336+ sys .exit (1 )
198337 else :
199338 for proto_file in tqdm (proto_files_found ):
200339 Path (args .output_path ).mkdir (parents = True , exist_ok = True )
0 commit comments