diff --git a/CMakeLists.txt b/CMakeLists.txt index 15a82ce..03e0860 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -138,13 +138,15 @@ if(BUILD_TESTING) ament_pep257() ament_xmllint() + find_package(ffmpeg_image_transport_msgs) find_package(ament_cmake_gtest REQUIRED) - ament_add_gtest(${PROJECT_NAME}_encoder_test test/encoder_test.cpp + ament_add_gtest(${PROJECT_NAME}_test test/encoder_decoder_test.cpp test/encoder_decoder_tester.cpp WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/test) - target_include_directories(${PROJECT_NAME}_encoder_test PUBLIC + target_include_directories(${PROJECT_NAME}_test PUBLIC $ $) - target_link_libraries(${PROJECT_NAME}_encoder_test ${PROJECT_NAME}) + target_link_libraries(${PROJECT_NAME}_test + ${ffmpeg_image_transport_msgs_TARGETS} ${PROJECT_NAME}) endif() ament_export_include_directories(include) diff --git a/README.md b/README.md index 36d6b28..fa00baf 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,8 @@ This ROS2 package supports encoding/decoding with the FFMpeg library, for example encoding h264 and h265 or HEVC, using Nvidia or other hardware acceleration when available. This package is meant to be used by image transport plugins like -the [ffmpeg image transport](https://github.com/ros-misc-utilities/ffmpeg_image_transport/). +the [ffmpeg image transport](https://github.com/ros-misc-utilities/ffmpeg_image_transport/) +and the [foxglove compressed video transport](https://github.com/ros-misc-utilities/foxglove_compressed_video_transport/). ## Supported systems @@ -12,6 +13,7 @@ Continuous integration is tested under Ubuntu with the following ROS2 distros: [![Build Status](https://build.ros2.org/buildStatus/icon?job=Hdev__ffmpeg_encoder_decoder__ubuntu_jammy_amd64&subject=Humble)](https://build.ros2.org/job/Hdev__ffmpeg_encoder_decoder__ubuntu_jammy_amd64/) [![Build Status](https://build.ros2.org/buildStatus/icon?job=Jdev__ffmpeg_encoder_decoder__ubuntu_noble_amd64&subject=Jazzy)](https://build.ros2.org/job/Jdev__ffmpeg_encoder_decoder__ubuntu_noble_amd64/) + [![Build Status](https://build.ros2.org/buildStatus/icon?job=Kdev__ffmpeg_encoder_decoder__ubuntu_noble_amd64&subject=Kilted)](https://build.ros2.org/job/Kdev__ffmpeg_encoder_decoder__ubuntu_noble_amd64/) [![Build Status](https://build.ros2.org/buildStatus/icon?job=Rdev__ffmpeg_encoder_decoder__ubuntu_noble_amd64&subject=Rolling)](https://build.ros2.org/job/Rdev__ffmpeg_encoder_decoder__ubuntu_noble_amd64/) @@ -34,11 +36,90 @@ and follow the [instructions here](https://github.com/ros-misc-utilities/.github Make sure to source your workspace's ``install/setup.bash`` afterwards. -## Parameters - -This package has no parameters. It is the upper layer's responsibility to e.g. manage the mapping between encoder and decoder, i.e. to tell the decoder class which libav decoder should be used for the decoding, or to set the encoding parameters. - -### How to use a custom version of libav (aka ffmpeg) +## API overview + +### Preliminaries +When using libav it is important to understand the difference between the encoder and the codec. +The *codec* is the standardized format in which images are encoded, for instance ``h264`` or ``hevc``. +The *encoder* is a libav software module that can encode images for a given codec. For instance ``libx264``, ``libx264rgb``, + `h264_nvenc``, and ``h264_vaapi`` are all *encoders* that encode for the *codec* h264. +Some of the encoders are hardware accelerated, some can handle more image formats than others, but in the end they all encode video for a specific codec. + +For the many AV options available for various libav encoders, and for ``qmax``, ``bit_rate`` and similar settings please refer to the ffmpeg documentation. + +### The inner workings +![overview_diagram](./doc/encoder_decoder.svg) + +The diagram above shows the stations that a ROS Image message passes as it traverses the encoder and decoder. +1) The ROS image (with [ROS sensor\_msgs/Image encoding](https://github.com/ros2/common_interfaces/blob/a2ef867438e6d4eed074d6e3668ae45187e7de86/sensor_msgs/include/sensor_msgs/image_encodings.hpp)) is first converted with the ROS [cv\_bridge](https://github.com/ros-perception/vision_opencv) into the ``cv_bridge_target_format``. + This conversion is necessary because some ROS encodings (like bayer images) are not supported by libswscale. + The ``cv_bridge_target_format`` can be set via ``setCVBridgeTargetFormat(const std::string & fmt)``. + If this format is not set explicitly the image will be converted to the default format of ``bgr8``. + This may not be what you want for e.g. ``mono8`` (gray) or Bayer images. + Ideally the``cv_bridge_target_format`` can be directly used by the libav decoder so the next step becomes a no-op. + But at the very least ``cv_bridge_target_format`` must be an acceptable libswscale input format (with the exception of + special hacks for encoding single-channel images, see below). +2) The image is then converted to ``av_source_pixel_format`` using libswscale. + The ``av_source_pixel_format`` can be set with ``setAVSourcePixelFormat()``, defaulting to something that is acceptable to the libav encoder. + You can use ffmpeg (``ffmpeg -h encoder=libx264 | grep formats``) to list all formats that an encoder supports. + Note that the ffmpeg/libav format string notation is different from the ROS encoding strings, and the ``av_source_pixel_format`` is specified using the libav convention, whereas the ``cv_bridge_target_format`` uses ROS convention! + (If you choose to bypass the cv\_bridge conversion from step 1 by feeding the images to the encoder directly via the ``encodeImage(const cv::Mat & img ...)`` method, you must still set the ``cv_bridge_target_format`` such that the encoder knows what format the ``img`` argument has.) + When aiming for lossless compression, beware of any ``av_source_pixel_format`` that reduces the color resolution, such as ``yuv420p``, ``nv12`` etc. + For Bayer images, use the special hack for single-channel images. + +3) The libav encoder encodes the packet with its supported codec, e.g. the ``libx264`` will produce ``h264`` packets. + The ``encoding`` field of the FFMPEGPacket message will document all image format conversions and the codec, in reverse order, separated by semicolon. + This way the decoder can attempt to reproduce the original ``ros_encoding``. + +4) The libav decoder decodes the packet into the original ``av_source_pixel_format``. + +5) Finally the image is converted to ``output_message_format`` using libswscale. + This format can be set (in ROS encoding syntax!) with ``setOutputMessageEncoding()``. + The format must be supported by both ROS and libswscale (except when using the special hack for single-channel images). + +Note that only very few combinations of libav encoders, ``cv_bridge_target_format`` and ``av_source_pixel_format`` have been tested. Please provide feedback if you observe crashes or find obvious bugs. PRs are always appreciated! + +### The special single-channel hack + +Many libav encoders do not support single-channel formats (like mono8 or bayer). +For this reason a special hack is implemented in the encoder that adds an empty (zero-value) color channel to the single-channel image. +Later, the decoder removes it again. +To utilitze this hack, specify a ``cv_bridge_target_format`` of e.g. ``bayer_rggb8``. Without the special hack, this would trigger an error because Bayer formats are not acceptable to libswscale. +Instead, the image is converted to ``yuv420p`` or ``nv12`` by adding an empty color channel. +These formats are acceptable to most encoders. +The decoder in turn recognizes that the ``cv_bridge_target_format`` is a single-channel format, but ``yuv420p``/``nv12`` are not, and therefore drops the color channel. +This hack greatly improves the efficiency for lossless encoding of Bayer images because it avoids conversion to full RGB and back. + +## API usage + +### Encoder + +Using the encoder involves the following steps: +- instantiating the ``Encoder`` object. +- setting properties like the libav encoder to use, the encoding formats, and AV options. +- initializing the encoder object. This requires knowledge of the image size and therefore + can only be done when the first image is available. Note that many properties + (encoding formats etc) must have been set *before* initializing the encoder. +- feeding images to the encoder (and handling the callbacks when encoded packets become available) +- flushing the encoder (may result in additional callbacks with encoded packets) +- destroying the ``Encoder`` object. + +The ``Encoder`` class API description has a short example code snippet. + +### Decoder + +Using the decoder involves the following steps: +- instantiating the ``Decoder`` object. +- if so desired, setting the ROS output (image encoding) format. +- initializing the decoder object. For this you need to know the encoding (codec, e.g. "h264"), + and you have to specify the libav decoder name (e.g. "h264_cuvid"). +- feeding encoded packets to the decoder (and handling the callbacks + when decoded images become available) +- flushing the decoder (may result in additional callbacks with decoded images) +- destroying the ``Decoder`` object. +The ``Decoder`` class description has a short example code snippet. + +## How to use a custom version of libav (aka ffmpeg) Compile *and install* ffmpeg. Let's say the install directory is ``/home/foo/ffmpeg/build``, then for it to be found while building, @@ -69,16 +150,8 @@ depth=1 &&cd jetson-ffmpeg && ./ffpatch.sh ../ffmpeg && cd ../ffmpeg && ./config Then follow the section above on how to actually use that custom ffmpeg library. As always first test on the -CLI that the newly compiled ``ffmpeg`` command now supports -``h264_nvmpi``. The transport can then be configured to use -nvmpi like so: +CLI that the newly compiled ``ffmpeg`` command now supports ``h264_nvmpi``. -``` - parameters=[{'ffmpeg_image_transport.encoding': 'h264_nvmpi', - 'ffmpeg_image_transport.profile': 'main', - 'ffmpeg_image_transport.preset': 'll', - 'ffmpeg_image_transport.gop_size': 15}] -``` ## License This software is issued under the Apache License Version 2.0. diff --git a/doc/encoder_decoder.dia b/doc/encoder_decoder.dia new file mode 100644 index 0000000..ef8d7b4 --- /dev/null +++ b/doc/encoder_decoder.dia @@ -0,0 +1,1256 @@ + + + + + + + + + + + + + #Letter# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #ros_encoding# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ## + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #cv_bridge_target_format# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #av_source_pixeL_format# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #libav encoder# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #FFMPEGPacket + + +# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #libav decoder# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #av_source_pixeL_format# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #output_message_format# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #libswscale# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #Image# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #libswscale# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #cv_bridge# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #Image# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #Encoder# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #Decoder# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #codec; +av_source_pixel_format; +cv_bridge_target_format; +ros_encoding# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/encoder_decoder.svg b/doc/encoder_decoder.svg new file mode 100644 index 0000000..f96a421 --- /dev/null +++ b/doc/encoder_decoder.svg @@ -0,0 +1,155 @@ + + + + + + + + + + + + + ros_encoding + + + + + + + + + cv_bridge_target_format + + + + + + av_source_pixeL_format + + + + + + libav encoder + + + + + + + + + + + + + + FFMPEGPacket + + + + + + + + + libav decoder + + + + + + av_source_pixeL_format + + + + + + output_message_format + + + + + + libswscale + + + + + + + + + + + + + + + + + + Image + + + + + + libswscale + + + + + + + + + + + + + + cv_bridge + + + + + + + + + + + + + + + + + + + + + + Image + + + + Encoder + + + Decoder + + + codec; + av_source_pixel_format; + cv_bridge_target_format; + ros_encoding + + + + + + + diff --git a/include/ffmpeg_encoder_decoder/decoder.hpp b/include/ffmpeg_encoder_decoder/decoder.hpp index 0e2521c..8292a2a 100644 --- a/include/ffmpeg_encoder_decoder/decoder.hpp +++ b/include/ffmpeg_encoder_decoder/decoder.hpp @@ -16,12 +16,15 @@ #ifndef FFMPEG_ENCODER_DECODER__DECODER_HPP_ #define FFMPEG_ENCODER_DECODER__DECODER_HPP_ +#include #include #include #include #include +#include #include #include +#include #include extern "C" { @@ -36,51 +39,231 @@ extern "C" { namespace ffmpeg_encoder_decoder { +/** + * \brief Decodes ffmpeg encoded messages via libav (ffmpeg). + * + * The Decoder class facilitates decoding of messages that have been encoded with the + * Encoder class by leveraging libav, the collection of libraries used by ffmpeg. + * Sample code: + ``` +void imageCallback(const sensor_msgs::msg::Image::ConstSharedPtr & img, bool isKeyFrame, + const std::string &avPixFmt) +{ + // process decoded image here... +} + +ffmpeg_encoder_decoder::Decoder decoder; +ffmpeg_image_transport_msgs::msg::FFMPEGPacket msg; +msg.header.frame_id = "frame_id"; +msg.width = 640; +msg.height = 480; +msg.step = 640 * 3; +msg.encoding = "hevc"; +msg.data.resize(msg.height * msg.step, 0); // not valid data! + +if (!decoder.isInitialized()) { + decoder.initialize(msg.encoding, imageCallback, "hevc_cuvid"); +} + +for (int64_t i = 0; i < 10; i++) { + msg.header.stamp = rclcpp::Time(i, RCL_SYSTEM_TIME); + if (!decoder.decodePacket( + msg.encoding, &msg.data[0], msg.data.size(), msg.pts, msg.header.frame_id, + msg.header.stamp)) { + throw(std::runtime_error("error decoding packet!")); + } +} +decoder.flush(); +``` + */ class Decoder { public: - using Callback = std::function; - using PTSMap = std::unordered_map; + /** + * \brief callback function signature + * \param img pointer to decoded image + * \param isKeyFrame true if the decoded image is a keyframe + * \param avPixFormat the original libav format of the encoded picture + */ + using Callback = std::function; + /** + * \brief Constructor. + */ Decoder(); + + /** + * \brief Destructor. + */ ~Decoder(); - bool isInitialized() const { return (codecContext_ != NULL); } - // Initialize decoder upon first packet received, - // providing callback to be called when frame is complete. - // You must still call decodePacket(msg) afterward! - bool initialize(const std::string & encoding, Callback callback, const std::string & dec); - // clears all state, but leaves config intact + + /** + * Test if decoder is initialized. + * \return true if the decoder is initialized. + */ + bool isInitialized() const + { + Lock lock(mutex_); + return (codecContext_ != NULL); + } + + /** + * \brief Initializes the decoder for a given codec and libav decoder. + * + * Initializes the decoder, with multiple decoders to pick from. + * \param codec the codec (encoding) from the first packet. Can never change! + * \param callback the function to call when frame has been decoded. + * \param decoder the name of the libav decoder to use. + * \return true if initialized successfully. + */ + bool initialize(const std::string & codec, Callback callback, const std::string & decoder); + + /** + * \brief Sets the ROS output message encoding format. + * + * Sets the ROS output message encoding format. Must be compatible with one of the + * libav encoding formats, or else exception will be thrown. If not set, + * the output encoding will default to bgr8. + * \param output_encoding defaults to bgr8 + * \throw std::runtime_error() if no matching libav pixel format could be found + */ + void setOutputMessageEncoding(const std::string & output_encoding); + + /** + * \brief Clears all decoder state except for timers, loggers, and other settings. + */ void reset(); - // decode packet (may result in frame callback!) + + /** + * \brief Decodes encoded packet. + * + * Decodes packet. Decoder must have been initialized beforehand. Calling this + * function may result in callback with decoded frame. + * \param encoding the name of the encoding/codec (typically from msg encoding) + * \param data pointer to packet data + * \param size size of packet data + * \param pts presentation time stamp of data packet + * \param frame_id ros frame id (from message header) + * \param stamp ros message header time stamp + * \return true if decoding was successful + */ bool decodePacket( const std::string & encoding, const uint8_t * data, size_t size, uint64_t pts, const std::string & frame_id, const rclcpp::Time & stamp); - void setMeasurePerformance(bool p) { measurePerformance_ = p; } + /** + * \brief Flush decoder. + * + * This method can only be called once at the end of the decoding stream. + * It will force any buffered packets to be delivered as frames. No further + * packets can be fed to the decoder after calling flush(). + * \return true if flush was successful (libav returns EOF) + */ + bool flush(); + + /** + * \brief Overrides the default ("Decoder") logger. + * \param logger the logger to override the default ("Decoder") with. + */ + void setLogger(rclcpp::Logger logger) + { + Lock lock(mutex_); + logger_ = logger; + } + + /** + * \brief Enables or disables performance measurements. Poorly tested, may be broken. + * \param p set to true to enable performance debugging. + */ + void setMeasurePerformance(bool p) + { + Lock lock(mutex_); + measurePerformance_ = p; + } + + /** + * \brief Prints performance timers. Poorly tested, may be broken. + * \param prefix for labeling the printout + */ void printTimers(const std::string & prefix) const; + + /** + * \brief resets performance debugging timers. Poorly tested, may be broken. + */ void resetTimers(); - void setLogger(rclcpp::Logger logger) { logger_ = logger; } - static const std::unordered_map & getDefaultEncoderToDecoderMap(); + + /** + * \brief adds AVOption setting to list of options to be applied before opening the encoder + * \param key name of AVOption to set, e.g. "preset" + * \param value value of AVOption e.g. "slow" + */ + void addAVOption(const std::string & key, const std::string & value) + { + Lock lock(mutex_); + avOptions_.push_back({key, value}); + } + + /** + * \brief Finds all hardware and software decoders for a given codec. + * + * Finds the name of all hardware and software decoders that match + * a certain codec (or encoder). + * \param codec name of the codec, i.e. h264, hevc etc + * \param hw_decoders non-null pointer for returning list of hardware decoders + * \param sw_decoders non-null pointer for returning list of software decoders + */ + static void findDecoders( + const std::string & codec, std::vector * hw_decoders, + std::vector * sw_decoders); + + /** + * \brief Finds all decoders that can decode a given codec. + * + * Finds the name of all hardware and software decoders (combined) + * that match a certain codec (or encoder). + * \param codec name of the codec, i.e. h264, hevc etc + * \return string with comma-separated list of libav decoders + */ + static std::string findDecoders(const std::string & codec); + + // ------------------- deprecated functions --------------- + /** + * \deprecated Use findDecoders(codec) instead. + */ + [[deprecated( + "use findDecoders(codec) now.")]] static const std::unordered_map & + getDefaultEncoderToDecoderMap(); private: - rclcpp::Logger logger_; - bool initDecoder(const std::string & encoding, const std::string & decoder); + using Lock = std::unique_lock; + bool doInitDecoder(const std::string & encoding, const std::string & decoder); + bool initDecoder(const std::string & encoding, const std::string & decoders); + int receiveFrame(); + int convertFrameToMessage(const AVFrame * frame, const ImagePtr & image); + void setAVOption(const std::string & field, const std::string & value); + void setEncoding(const std::string & encoding); + void resetNoLock(); // --------------- variables + rclcpp::Logger logger_; Callback callback_; - PTSMap ptsToStamp_; // mapping of header - + PTSMap ptsToStamp_; + std::vector> avOptions_; + mutable std::mutex mutex_; // --- performance analysis bool measurePerformance_{false}; TDiff tdiffTotal_; - // --- libav stuff + // --- libav related variables AVRational timeBase_{1, 100}; - std::string encoding_; + std::string packetEncoding_; + std::string origEncoding_; AVCodecContext * codecContext_{NULL}; - AVFrame * decodedFrame_{NULL}; + AVFrame * swFrame_{NULL}; AVFrame * cpuFrame_{NULL}; - AVFrame * colorFrame_{NULL}; + AVFrame * outputFrame_{NULL}; SwsContext * swsContext_{NULL}; - enum AVPixelFormat hwPixFormat_; + enum AVPixelFormat hwPixFormat_ { AV_PIX_FMT_NONE }; + std::string outputMsgEncoding_; AVPacket packet_; AVBufferRef * hwDeviceContext_{NULL}; }; diff --git a/include/ffmpeg_encoder_decoder/encoder.hpp b/include/ffmpeg_encoder_decoder/encoder.hpp index a93fb41..f9e4d3d 100644 --- a/include/ffmpeg_encoder_decoder/encoder.hpp +++ b/include/ffmpeg_encoder_decoder/encoder.hpp @@ -16,6 +16,7 @@ #ifndef FFMPEG_ENCODER_DECODER__ENCODER_HPP_ #define FFMPEG_ENCODER_DECODER__ENCODER_HPP_ +#include #include #include #include @@ -39,72 +40,200 @@ extern "C" { namespace ffmpeg_encoder_decoder { +/** + * \brief Encodes ROS image messages via libav (ffmpeg). + * + * The Encoder class facilitates encoding of ROS images by leveraging libav, + * the collection of libraries used by ffmpeg. + * Sample code: + ``` + // handle the encoded packet in this function + void packetReady( + const std::string & frame_id, const rclcpp::Time & stamp, const std::string & codec, + uint32_t width, uint32_t height, uint64_t pts, uint8_t flags, uint8_t * data, size_t sz) { + // do something useful here ... + } + + ffmpeg_encoder_decoder::Encoder enc; + enc.setEncoder("libx265"); // set libav encoder + // note: will not be lossless unless we happen to have right pixel format + enc.setAVOption("x265-params", "lossless=1"); + enc.setAVOption("crf", "0"); + + sensor_msgs::msg::Image image; + image.encoding = "bgr8"; + image.width = image_width; + image.height = image_height; + image.step = image.width * 3; // 3 bytes per pixel + image.header.frame_id = "frame_id"; + image.is_bigendian = false; + + if (!enc.initialize(image.width, image.height, callback_fn, image.encoding)) { + std::cerr << "failed to initialize encoder!" << std::endl; + exit (-1); + } + + for (int64_t i = 0; i < numFrames; i++) { + image.header.stamp = rclcpp::Time(i + 1, RCL_SYSTEM_TIME); + image.data.resize(image.step * image.height, static_cast(i)); + enc.encodeImage(image); // may produce callback + } + enc.flush(); // may produce callback +} + ``` + */ class Encoder { public: - using Lock = std::unique_lock; + /** + * \brief defines callback function signature. + * + * The encoder will call this function when an encoded packet is ready. + * \param frame_id the frame_id of the message that produced the packet + * \param stamp the ROS time stamp of the message that produced the packet + * \param codec the codec used for compression: h264, hevc + * \param width image width + * \param height image height + * \param pts libav presentation time stamp (pts) + * \param data pointer to encoded packet + * \param sz size of encoded packet + */ using Callback = std::function; + /** + * \brief Constructor. Doesn't open codec or do anything else dangerous. + */ Encoder(); + /** + * \brief Destructor. Clears all state and frees all memory. + */ ~Encoder(); - // ------- various encoding settings - void setEncoder(const std::string & n) - { - Lock lock(mutex_); - encoder_ = n; - } - void setProfile(const std::string & p) + /** + * \brief sets the name of libav encoder to be used + * \param encoder name of the libav encoder, e.g. libx264, hevc_nvenc etc + */ + void setEncoder(const std::string & encoder_name); + /** + * \brief adds AVOption setting to list of options to be applied before opening the encoder + * \param key name of AVOption to set, e.g. "preset" + * \param value value of AVOption e.g. "slow" + */ + void addAVOption(const std::string & key, const std::string & value) { Lock lock(mutex_); - profile_ = p; + avOptions_.push_back({key, value}); } - void setPreset(const std::string & p) + + /** + * This is the format of the image that will be fed into the encoder. + * In the README, this is referred to as av_source_pixel_format. + * + * If your ROS message format (cv_bridge_target_format) is different, + * the image will first be converted to the format you set here. + * Must be set before calling initialize(), or else the encoder + * will try to pick some suitable format. See README for more + * details about formats. + * + * \param fmt The pixel format in libav naming convention, e.g. "yuv420p" + */ + void setAVSourcePixelFormat(const std::string & fmt) { Lock lock(mutex_); - preset_ = p; + pixFormat_ = pixelFormat(fmt); } - void setTune(const std::string & p) + /** + * \brief Set cv_bridge_target_format for first image conversion + * + * Sets the target format of the first image conversion done via + * cv_bridge (cv_bridge_target_format). See explanation in the + * README file. Must be set before calling initialize(), or else + * it will default to "bgr8". The cv_bridge_target_format must + * be compatible with some AV_PIX_FMT* type, or else an exception + * will be thrown when calling initialize(). + * + * \param fmt cv_bridge_target_format (see ROS image_encodings.hpp header) + */ + void setCVBridgeTargetFormat(const std::string & fmt) { Lock lock(mutex_); - tune_ = p; + cvBridgeTargetFormat_ = fmt; } - void setPixelFormat(const std::string & p) + + /** + * \brief Sets q_max parameter. See ffmpeg docs for explanation. + * + * \param q q_max parameter + */ + void setQMax(int q) { Lock lock(mutex_); - pixFormat_ = pixelFormat(p); + qmax_ = q; } - void setDelay(const std::string & p) + /** + * \brief Sets bitrate parameter. See ffmpeg docs for explanation. + * \param bitrate bitrate parameter (in bits/s) + */ + void setBitRate(int bitrate) { Lock lock(mutex_); - delay_ = p; + bitRate_ = bitrate; } - void setCRF(const std::string & c) + /** + * \brief gets current bitrate parameter. + * \return the bitrate in bits/sec + */ + int getBitRate() const { Lock lock(mutex_); - crf_ = c; + return (bitRate_); } - void setQMax(int q) + + /** + * \brief Sets gop size (max distance between keyframes). See ffmpeg docs. + * \param gop_size max distance between keyframes + */ + void setGOPSize(int g) { Lock lock(mutex_); - qmax_ = q; + GOPSize_ = g; } - void setBitRate(int r) + /** + * \brief gets current gop size. + * \return the gop_size (max distance between keyframes) + */ + int getGOPSize() const { Lock lock(mutex_); - bitRate_ = r; + return (GOPSize_); } - int getGOPSize() const + /** + * \brief sets maximum number of b-frames. See ffmpeg docs. + * \param b max number of b-frames. + */ + void setMaxBFrames(int b) { Lock lock(mutex_); - return (GOPSize_); + maxBFrames_ = b; } - void setGOPSize(int g) + /** + * \brief gets maximum number of b-frames. See ffmpeg docs. + * \return max number of b-frames. + */ + int getMaxBFrames() const { Lock lock(mutex_); - GOPSize_ = g; + return (maxBFrames_); } + /** + * \brief Sets encoding frame rate as a rational number. + * + * Not sure what it really does. + * + * \param frames the number of frames during the interval + * \param second the time interval (integer seconds) + */ void setFrameRate(int frames, int second) { Lock lock(mutex_); @@ -113,57 +242,149 @@ class Encoder timeBase_.num = second; timeBase_.den = frames; } + /** + * \brief enables or disables performance measurements. Poorly tested, hardly used. + * + * \param p true enables performance statistics + */ void setMeasurePerformance(bool p) { Lock lock(mutex_); measurePerformance_ = p; } - // ------- teardown and startup + // ------- related to teardown and startup + /** + * \brief test if encoder is initialized + * \return true if encoder is initialized + */ bool isInitialized() const { Lock lock(mutex_); return (codecContext_ != NULL); } - bool initialize(int width, int height, Callback callback); + /** + * \brief initializes encoder. + * \param width image width + * \param height image height + * \param callback the function to call for handling encoded packets + * \param encoding the ros encoding string, e.g. bayer_rggb8, rgb8 ... + */ + bool initialize( + int width, int height, Callback callback, const std::string & encoding = std::string()); + /** + * \brief sets ROS logger to use for info/error messages + * \param logger the logger to use for messages + */ void setLogger(rclcpp::Logger logger) { logger_ = logger; } + /** + * \brief completely resets the state of the encoder. + */ void reset(); - // encode image - void encodeImage(const cv::Mat & img, const Header & header, const rclcpp::Time & t0); void encodeImage(const Image & msg); - // flush all packets. Need header to generate callback message - void flush(const Header & header); + /** + * \brief flush all packets (produces callbacks). + */ + void flush(); + /** + * \brief finds the codec for a given encoder, i.e. returns h264 for h264_vaapi + * \param encoder name of libav encoder + * \return codec (libav naming convention) + */ + static std::string findCodec(const std::string & encoder); + // ------- performance statistics void printTimers(const std::string & prefix) const; void resetTimers(); + // ------- deprecated functions + /** + * \deprecated use addAVOption("profile", "value") now. + */ + [[deprecated("use addAVOption() instead.")]] void setProfile(const std::string & p) + { + addAVOption("profile", p); + } + /** + * \deprecated use addAVOption("preset", "value") now. + */ + [[deprecated("use addAVOption() instead.")]] void setPreset(const std::string & p) + { + addAVOption("preset", p); + } + /** + * \deprecated use addAVOption("tune", "value") now. + */ + [[deprecated("use addAVOption() instead.")]] void setTune(const std::string & p) + { + addAVOption("tune", p); + } + /** + * \deprecated use addAVOption("delay", "value") now. + */ + [[deprecated("use addAVOption() instead.")]] void setDelay(const std::string & p) + { + addAVOption("delay", p); + } + /** + * \deprecated use addAVOption("crf", "value") now. + */ + [[deprecated("use addAVOption() instead.")]] void setCRF(const std::string & c) + { + addAVOption("crf", c); + } + /** + * \deprecated Use setAVSourcePixelFormat() + */ + [[deprecated("use setAVSourcePixelFormat(fmt) instead.")]] void setPixelFormat( + const std::string & fmt) + { + setAVSourcePixelFormat(fmt); + } + /** + * \deprecated use encodeImage(const Image &msg) instead. + */ + [[deprecated("use encodeImage(const Image &msg) instead.")]] void encodeImage( + const cv::Mat & img, const Header & header, const rclcpp::Time & t0); + /** + * flush all packets (produces callbacks). + * \deprecated Only header.frame_id is used. Used flush(frame_id) now. + */ + [[deprecated("use flush() instead.")]] void flush(const Header & header); private: - using PTSMap = std::unordered_map; + using Lock = std::unique_lock; - bool openCodec(int width, int height); - void doOpenCodec(int width, int height); + bool openCodec(int width, int height, const std::string & encoding); + void doOpenCodec(int width, int height, const std::string & encoding); void closeCodec(); - int drainPacket(const Header & hdr, int width, int height); + void doEncodeImage(const cv::Mat & img, const Header & header, const rclcpp::Time & t0); + int drainPacket(int width, int height); AVPixelFormat pixelFormat(const std::string & f) const; - void openVAAPIDevice(const AVCodec * codec, int width, int height); + void openHardwareDevice( + const AVCodec * codec, enum AVHWDeviceType hwDevType, int width, int height); void setAVOption(const std::string & field, const std::string & value); + enum AVPixelFormat findMatchingSourceFormat( + const std::string & rosSrcFormat, enum AVPixelFormat targetFormat); + // --------- variables rclcpp::Logger logger_; mutable std::recursive_mutex mutex_; Callback callback_; // config - std::string encoder_; // e.g. "libx264" - std::string preset_; // e.g. "slow", "medium", "lossless" - std::string profile_; // e.g. "main", "high", "rext" - std::string tune_; // e.g. "tune" - std::string delay_; // default is 4 frames for parallel processing. 0 is lowest latency - std::string crf_; // constant rate factor. 0 is lossless, 51 is worst quality - int qmax_{0}; // max allowed quantization. The lower the better quality - int GOPSize_{15}; // distance between two keyframes + std::string encoder_; // e.g. "libx264" + std::string codec_; // e.g. "h264" + std::string encoding_; // e.g. "h264/rgb8" + int qmax_{-1}; // max allowed quantization. The lower the better quality + int GOPSize_{-1}; // distance between two keyframes + int maxBFrames_{-1}; // maximum number of b-frames + int64_t bitRate_{0}; // max rate in bits/s + std::vector> avOptions_; + AVPixelFormat pixFormat_{AV_PIX_FMT_NONE}; + std::string cvBridgeTargetFormat_ = "bgr8"; AVRational timeBase_{1, 100}; AVRational frameRate_{100, 1}; - int64_t bitRate_{1000000}; bool usesHardwareFrames_{false}; + std::string avSourcePixelFormat_; // ------ libav state AVCodecContext * codecContext_{nullptr}; AVBufferRef * hwDeviceContext_{nullptr}; @@ -176,7 +397,7 @@ class Encoder // ---------- other stuff int64_t pts_{0}; PTSMap ptsToStamp_; - // performance analysis + // ---------- performance analysis bool measurePerformance_{true}; int64_t totalInBytes_{0}; int64_t totalOutBytes_{0}; diff --git a/include/ffmpeg_encoder_decoder/pts_map.hpp b/include/ffmpeg_encoder_decoder/pts_map.hpp new file mode 100644 index 0000000..9d5a18e --- /dev/null +++ b/include/ffmpeg_encoder_decoder/pts_map.hpp @@ -0,0 +1,35 @@ +// -*-c++-*--------------------------------------------------------------------------------------- +// Copyright 2025 Bernd Pfrommer +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef FFMPEG_ENCODER_DECODER__PTS_MAP_HPP_ +#define FFMPEG_ENCODER_DECODER__PTS_MAP_HPP_ + +#include +#include +#include +#include + +namespace ffmpeg_encoder_decoder +{ +struct PTSMapEntry +{ + PTSMapEntry(const rclcpp::Time & t, const std::string & f) : time(t), frame_id(f) {} + rclcpp::Time time; + std::string frame_id; +}; + +using PTSMap = std::unordered_map; +} // namespace ffmpeg_encoder_decoder +#endif // FFMPEG_ENCODER_DECODER__PTS_MAP_HPP_ diff --git a/include/ffmpeg_encoder_decoder/utils.hpp b/include/ffmpeg_encoder_decoder/utils.hpp index 7a7da4e..9dce509 100644 --- a/include/ffmpeg_encoder_decoder/utils.hpp +++ b/include/ffmpeg_encoder_decoder/utils.hpp @@ -16,6 +16,7 @@ #ifndef FFMPEG_ENCODER_DECODER__UTILS_HPP_ #define FFMPEG_ENCODER_DECODER__UTILS_HPP_ +#include #include #include #include @@ -28,42 +29,152 @@ namespace ffmpeg_encoder_decoder { namespace utils { -/**! -* convert av pixel format to clear text string -*/ +/** + * \brief Convert av pixel format to clear text string. + * \return string with name of pixel format + */ std::string pix(const AVPixelFormat & f); -/**! -* convert av error number to string -*/ + +/** + * \brief Long descriptor of av pixel format + * \return string with long name of pixel format + */ +std::string pix_long(AVPixelFormat const & f); + +/** + * \brief Convert av error number to string. + * \param errnum libav error number + * \return clear text string with error message + */ std::string err(int errnum); -/**! -* throws runtime_error() with decoded av error string -*/ + +/** + * \brief throws runtime_error() with decoded av error string + * \param msg message (free form text) + * \param errnum libav error number + */ void throw_err(const std::string & msg, int errnum); -/**! -* checks for error and throws runtime_error() with av error string -*/ + +/** + * \brief checks for error and throws runtime_error() with av error string + * \param msg error message (free form) + * \param errnum libav error number + */ void check_for_err(const std::string & msg, int errnum); -/**! -* finds hardware configuration, in particular the target pixel format -* and whether the encoder uses hardware frame upload -*/ + +/** + * \brief finds hardware configuration. + * + * Finds hardware configuration, in particular the target pixel format + * and whether the encoder uses hardware frame upload. + * \param usesHWFrames will be set to true if hw frames are used + * \param hwDevType the hardware device type to probe get the config for + * \param codec the codec to get the config for + * \return the hardware pixel format to use, or AV_PIX_FMT_NONE + */ enum AVPixelFormat find_hw_config( bool * usesHWFrames, enum AVHWDeviceType hwDevType, const AVCodec * codec); -std::vector get_hwframe_transfer_formats(AVBufferRef * hwframe_ctx); -/**! -* finds formats that the encoder supports. Note that for VAAPI, this will just -* return AV_PIX_FMT_VAAPI since it uses hardware frames. -*/ +/** + * \brief Gets all pixel formats that can be transferred to the hardware device + * \param hwframe_ctx the hardware frame context for which transfer is intended + * \return vector of allowed pixel formats + */ +std::vector get_hwframe_transfer_formats( + AVBufferRef * hwframe_ctx, enum AVHWFrameTransferDirection direction); + +/** + * \brief finds all formats that the encoder supports. + * + * Note that for VAAPI, this will justreturn AV_PIX_FMT_VAAPI since it uses hardware frames. + * \param @return vector with pixel formats supported by codec in this context + */ std::vector get_encoder_formats( - AVCodecContext * context, const AVCodec * avctx); -/**! -* picks from a vector of formats the "best" pixel format for a given encoder -*/ + const AVCodecContext * context, const AVCodec * avctx); + +/** + * \brief gets the preferred pixel format from list. + * \param useHWFormat if true only return hardware accelerated formats + * \param fmts vector of formats to choose from + * \return preferred pixel format + */ enum AVPixelFormat get_preferred_pixel_format( - const std::string & encoder, const std::vector & fmts); + bool useHWFormat, const std::vector & fmts); + +/** + * \brief finds the names of all available decoders + * for a given codec (or encoder) + * \param codec the codec / encoding to find decoders for + * \param hw_decoders (output) non-null ptr to hardware decoders + * \param sw_decoders (output) non-null ptr to software decoders + */ +void find_decoders( + const std::string & codec, std::vector * hw_decoders, + std::vector * sw_decoders); + +/** + * \brief finds the names of all available decoders + * for a given codec (or encoder) + * \param codec the codec / encoding to find decoders for + * \return string with comma separated list of libav decoder names + */ + +std::string find_decoders(const std::string & codec); +/** + * \brief filters a string with comma-separated decoders and + * + * \param codec the codec / encoding to filter for + * \param decoders string with comma-separated list of decoder names + * \return string with comma separated list of libav decoder names + */ +std::string filter_decoders(const std::string & codec, const std::string & decoders); + +/** + * \brief gets list of names of all libav supported device types + * + * This is not the list of devices present on the host machine, just + * the ones that libav supports, whether they are present or not. + * \return list of all libav supported device types + */ +std::vector get_hwdevice_types(); + +/** + * \brief find codec for a given encoder + * \param encoder name of libav encoder + * \return name of codec + */ +std::string find_codec(const std::string & encoder); + +/** + * \brief gets hardware device type for specific codec + * \param codec pointer to libav codec + * \param return associated hardware device type + */ +enum AVHWDeviceType find_hw_device_type(const AVCodec * codec); + +/** + * \brief finds AVPixelFormat corresponding to ROS encoding + * \param ros_pix_fmt ros encoding name, e.g. "bgr8" + * \return corresponding AV pixel format + * \throws std::runtime_error exception when no match found. + */ +enum AVPixelFormat ros_to_av_pix_format(const std::string & ros_pix_fmt); + +/** + * \brief splits string by character + * \param str_list character-separated list of tokens + * \param sep separator character + * \return vector of separated strings + */ +std::vector split_by_char(const std::string & str_list, const char sep); +/** + * \brief determine if given ROS single-channel encoding fits into libav color format + * \param encoding ROS message encoding, e.g. bayer_rggb8 + * \param fmt libav color format to test for + * \return true if ROS encoding is single channel and color format is like NV12/yuv420p + */ +bool encode_single_channel_as_color(const std::string & encoding, enum AVPixelFormat fmt); } // namespace utils } // namespace ffmpeg_encoder_decoder #endif // FFMPEG_ENCODER_DECODER__UTILS_HPP_ diff --git a/package.xml b/package.xml index 8a53841..700c71f 100644 --- a/package.xml +++ b/package.xml @@ -14,12 +14,13 @@ cv_bridge ffmpeg - libavdevice-dev + libavdevice-dev libopencv-imgproc-dev rclcpp sensor_msgs std_msgs + ffmpeg_image_transport_msgs ament_lint_auto ament_lint_common ament_cmake_gtest diff --git a/src/decoder.cpp b/src/decoder.cpp index 5458f21..667ec44 100644 --- a/src/decoder.cpp +++ b/src/decoder.cpp @@ -14,6 +14,7 @@ // limitations under the License. #include +#include #include #include #include @@ -23,24 +24,21 @@ namespace ffmpeg_encoder_decoder { -// default mappings -static const std::unordered_map defaultMap{ - {{"h264_nvenc", "h264"}, - {"libx264", "h264"}, - {"hevc_nvenc", "hevc_cuvid"}, - {"h264_nvmpi", "h264"}, - {"h264_vaapi", "h264"}}}; Decoder::Decoder() : logger_(rclcpp::get_logger("Decoder")) {} -Decoder::~Decoder() { reset(); } +Decoder::~Decoder() { resetNoLock(); } void Decoder::reset() +{ + Lock lock(mutex_); + resetNoLock(); +} + +void Decoder::resetNoLock() { if (codecContext_) { avcodec_free_context(&codecContext_); - // avcodec_close(codecContext_); - // av_free(codecContext_); codecContext_ = NULL; } if (swsContext_) { @@ -50,37 +48,48 @@ void Decoder::reset() if (hwDeviceContext_) { av_buffer_unref(&hwDeviceContext_); } - av_free(decodedFrame_); - decodedFrame_ = NULL; + av_free(swFrame_); + swFrame_ = NULL; av_free(cpuFrame_); cpuFrame_ = NULL; - av_free(colorFrame_); - colorFrame_ = NULL; + av_free(outputFrame_); + outputFrame_ = NULL; + hwPixFormat_ = AV_PIX_FMT_NONE; + outputMsgEncoding_ = ""; + packetEncoding_ = ""; } -bool Decoder::initialize(const std::string & encoding, Callback callback, const std::string & dec) +void Decoder::setOutputMessageEncoding(const std::string & output_encoding) { - std::string decoder = dec; - if (decoder.empty()) { - RCLCPP_INFO_STREAM(logger_, "no decoder for encoding: " << encoding); - return (false); - } - callback_ = callback; - encoding_ = encoding; - return (initDecoder(encoding_, decoder)); + Lock lock(mutex_); + RCLCPP_INFO_STREAM(logger_, "forcing output encoding: " << output_encoding); + outputMsgEncoding_ = output_encoding; +} + +static std::vector splitEncoding(const std::string & encoding) +{ + return (utils::split_by_char(encoding, ';')); } -static enum AVHWDeviceType get_hw_type(const std::string & name, rclcpp::Logger logger) +void Decoder::setEncoding(const std::string & encoding) { - enum AVHWDeviceType type = av_hwdevice_find_type_by_name(name.c_str()); - if (type == AV_HWDEVICE_TYPE_NONE) { - RCLCPP_INFO_STREAM(logger, "hw accel device is not supported: " << name); - RCLCPP_INFO_STREAM(logger, "available devices:"); - while ((type = av_hwdevice_iterate_types(type)) != AV_HWDEVICE_TYPE_NONE) - RCLCPP_INFO_STREAM(logger, av_hwdevice_get_type_name(type)); - return (type); + packetEncoding_ = encoding; + const auto split = splitEncoding(encoding); + if (outputMsgEncoding_.empty()) { + // assume orig was bgr8 + outputMsgEncoding_ = split.size() == 4 ? split[3] : "bgr8"; + RCLCPP_INFO_STREAM( + logger_, + "output image encoding: " << outputMsgEncoding_ << ((split.size() == 4) ? "" : " (default)")); } - return (type); +} + +bool Decoder::initialize( + const std::string & encoding, Callback callback, const std::string & decoder) +{ + Lock lock(mutex_); + callback_ = callback; + return (initDecoder(encoding, decoder)); } static AVBufferRef * hw_decoder_init( @@ -88,36 +97,19 @@ static AVBufferRef * hw_decoder_init( { int rc = av_hwdevice_ctx_create(hwDeviceContext, hwType, NULL, NULL, 0); if (rc < 0) { - RCLCPP_ERROR_STREAM(logger, "failed to create context for HW device: " << hwType); + RCLCPP_ERROR_STREAM( + logger, "failed to create context for HW device: " << av_hwdevice_get_type_name(hwType)); return (NULL); } + RCLCPP_INFO_STREAM(logger, "using hardware acceleration: " << av_hwdevice_get_type_name(hwType)); return (av_buffer_ref(*hwDeviceContext)); } -static std::unordered_map pix_format_map; - -static enum AVPixelFormat get_hw_format(AVCodecContext * ctx, const enum AVPixelFormat * pix_fmts) -{ - enum AVPixelFormat pf = pix_format_map[ctx]; - const enum AVPixelFormat * p; - for (p = pix_fmts; *p != -1; p++) { - if (*p == pf) { - return *p; - } - } - std::cerr << "Failed to get HW surface format." << std::endl; - return AV_PIX_FMT_NONE; -} - -static enum AVPixelFormat find_pix_format( - const std::string & codecName, enum AVHWDeviceType hwDevType, const AVCodec * codec, - const std::string & hwAcc, rclcpp::Logger logger) +static enum AVPixelFormat find_pix_format(enum AVHWDeviceType hwDevType, const AVCodec * codec) { for (int i = 0;; i++) { const AVCodecHWConfig * config = avcodec_get_hw_config(codec, i); if (!config) { - RCLCPP_WARN_STREAM( - logger, "decoder " << codecName << " does not support hw accel: " << hwAcc); return (AV_PIX_FMT_NONE); } if ( @@ -129,11 +121,122 @@ static enum AVPixelFormat find_pix_format( return (AV_PIX_FMT_NONE); } +// This function is an adapted version of avcodec_default_get_format() +// +// In the ffmpeg executable, the matching between input and output formats +// when transcoding is a complex process, with filters inserted between +// sink and source that rescale and reformat the images. This get_format() +// function tries to provide some of the functionality without lifting +// large parts of the code base from the ffmpeg tool. + +enum AVPixelFormat get_format(struct AVCodecContext * avctx, const enum AVPixelFormat * fmt) +{ + const AVPixFmtDescriptor * desc; + const AVCodecHWConfig * config; + int i, n; +#ifdef DEBUG_PIXEL_FORMAT + if (avctx->codec) { + printf("codec is: %s\n", avctx->codec->name); + } + char buf[64]; + buf[63] = 0; + for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++) { + av_get_pix_fmt_string(buf, sizeof(buf) - 1, fmt[n]); + printf("offered pix fmt: %d = %s\n", fmt[n], buf); + } +#endif + // If a device was supplied when the codec was opened, assume that the + // user wants to use it. + if (avctx->hw_device_ctx && avcodec_get_hw_config(avctx->codec, 0)) { + AVHWDeviceContext * device_ctx = + reinterpret_cast(avctx->hw_device_ctx->data); + for (i = 0;; i++) { + config = avcodec_get_hw_config(avctx->codec, i); + if (!config) break; + if (!(config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX)) continue; + if (device_ctx->type != config->device_type) continue; + for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++) { + if (config->pix_fmt == fmt[n]) { +#ifdef DEBUG_PIXEL_FORMAT + av_get_pix_fmt_string(buf, sizeof(buf) - 1, fmt[n]); + printf("using pix fmt: %d = %s\n", fmt[n], buf); +#endif + return fmt[n]; + } + } + } + } + + for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++) { + } + desc = av_pix_fmt_desc_get(fmt[n - 1]); + if (!(desc->flags & AV_PIX_FMT_FLAG_HWACCEL)) { +#ifdef DEBUG_PIXEL_FORMAT + av_get_pix_fmt_string(buf, sizeof(buf) - 1, fmt[n - 1]); + printf("using unaccelerated last fmt: %d = %s\n", fmt[n - 1], buf); +#endif + return fmt[n - 1]; + } + + // Finally, traverse the list in order and choose the first entry + // with no external dependencies (if there is no hardware configuration + // information available then this just picks the first entry). + for (n = 0; fmt[n] != AV_PIX_FMT_NONE; n++) { + for (i = 0;; i++) { + config = avcodec_get_hw_config(avctx->codec, i); + if (!config) break; + if (config->pix_fmt == fmt[n]) break; + } + if (!config) { + // No specific config available, so the decoder must be able + // to handle this format without any additional setup. +#ifdef DEBUG_PIXEL_FORMAT + av_get_pix_fmt_string(buf, sizeof(buf) - 1, fmt[n]); + printf("handle without setup %d = %s\n", fmt[n], buf); +#endif + return fmt[n]; + } + if (config->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL) { + // Usable with only internal setup. +#ifdef DEBUG_PIXEL_FORMAT + av_get_pix_fmt_string(buf, sizeof(buf) - 1, fmt[n]); + printf("handle with internal setup %d = %s\n", fmt[n], buf); +#endif + return fmt[n]; + } + } + // Nothing is usable, give up. + return AV_PIX_FMT_NONE; +} + bool Decoder::initDecoder(const std::string & encoding, const std::string & decoder) { + const AVCodec * codec = avcodec_find_decoder_by_name(decoder.c_str()); + if (!codec) { + RCLCPP_WARN_STREAM(logger_, "decoder " << decoder << " cannot decode " << encoding); + return (false); + } + // use the decoder if it either is software, or has working + // hardware support + const AVCodecHWConfig * hwConfig = nullptr; + if (codec->capabilities & AV_CODEC_CAP_HARDWARE) { + hwConfig = avcodec_get_hw_config(codec, 0); + if (!hwConfig) { + RCLCPP_INFO_STREAM(logger_, "ignoring decoder with no hardware config: " << decoder); + return (false); + } + } + if (!doInitDecoder(encoding, decoder)) { + return (false); + } + return (true); +} + +bool Decoder::doInitDecoder(const std::string & encoding, const std::string & decoder) +{ + setEncoding(encoding); try { - const AVCodec * codec = NULL; - codec = avcodec_find_decoder_by_name(decoder.c_str()); + const AVCodec * codec = avcodec_find_decoder_by_name(decoder.c_str()); if (!codec) { RCLCPP_ERROR_STREAM(logger_, "cannot find decoder " << decoder); throw(std::runtime_error("cannot find decoder " + decoder)); @@ -145,55 +248,174 @@ bool Decoder::initDecoder(const std::string & encoding, const std::string & deco throw(std::runtime_error("alloc context failed!")); } av_opt_set_int(codecContext_, "refcounted_frames", 1, 0); - const std::string hwAcc("cuda"); - enum AVHWDeviceType hwDevType = get_hw_type(hwAcc, logger_); + enum AVHWDeviceType hwDevType = AV_HWDEVICE_TYPE_NONE; + if (codec->capabilities & AV_CODEC_CAP_HARDWARE) { + const AVCodecHWConfig * hwConfig = avcodec_get_hw_config(codec, 0); + if (hwConfig) { + hwDevType = hwConfig->device_type; + RCLCPP_INFO_STREAM( + logger_, + "decoder " << decoder << " has hw accelerator: " << av_hwdevice_get_type_name(hwDevType)); + } else { + RCLCPP_WARN_STREAM(logger_, "decoder " << decoder << " does not have hw acceleration!"); + } + } else { + RCLCPP_INFO_STREAM(logger_, "decoder " << decoder << " has no hardware acceleration"); + } // default - hwPixFormat_ = AV_PIX_FMT_NONE; - if (hwDevType != AV_HWDEVICE_TYPE_NONE) { codecContext_->hw_device_ctx = hw_decoder_init(&hwDeviceContext_, hwDevType, logger_); if (codecContext_->hw_device_ctx != NULL) { - hwPixFormat_ = find_pix_format(encoding, hwDevType, codec, hwAcc, logger_); - // must put in global hash for the callback function - pix_format_map[codecContext_] = hwPixFormat_; - codecContext_->get_format = get_hw_format; - } else { // hardware couldn't be initialized. - hwDevType = AV_HWDEVICE_TYPE_NONE; + hwPixFormat_ = find_pix_format(hwDevType, codec); + codecContext_->get_format = get_format; } } codecContext_->pkt_timebase = timeBase_; + std::stringstream ss; + for (const auto & kv : avOptions_) { + setAVOption(kv.first, kv.second); + ss << " " << kv.first << "=" << kv.second; + } + RCLCPP_INFO(logger_, "using decoder %10s with options: %s", decoder.c_str(), ss.str().c_str()); if (avcodec_open2(codecContext_, codec, NULL) < 0) { - RCLCPP_ERROR_STREAM(logger_, "open context failed for " + decoder); av_free(codecContext_); codecContext_ = NULL; codec = NULL; - throw(std::runtime_error("open context failed!")); + hwPixFormat_ = AV_PIX_FMT_NONE; + throw(std::runtime_error("open context failed for " + decoder)); } - decodedFrame_ = av_frame_alloc(); + swFrame_ = av_frame_alloc(); cpuFrame_ = (hwPixFormat_ == AV_PIX_FMT_NONE) ? NULL : av_frame_alloc(); - colorFrame_ = av_frame_alloc(); - colorFrame_->format = AV_PIX_FMT_BGR24; + outputFrame_ = av_frame_alloc(); } catch (const std::runtime_error & e) { RCLCPP_ERROR_STREAM(logger_, e.what()); - reset(); + resetNoLock(); return (false); } RCLCPP_INFO_STREAM(logger_, "decoding with " << decoder); return (true); } +bool Decoder::flush() +{ + Lock lock(mutex_); + if (!codecContext_) { + return (false); + } + int ret = avcodec_send_packet(codecContext_, nullptr); + if (ret != 0) { + RCLCPP_WARN_STREAM(logger_, "failed to send flush packet!"); + return (false); + } + // receive frames until decoder buffer is empty or error occurs. + // this may trigger frame callbacks. + while ((ret = receiveFrame()) == 0) { + // will return 0, EAGAIN, EOF, or other error + } + if (ret != AVERROR_EOF) { + RCLCPP_WARN_STREAM(logger_, "decoder flush failed with error"); + return (false); + } + return (true); +} + +int Decoder::receiveFrame() +{ + auto & ctx = codecContext_; // shorthand + const int ret = avcodec_receive_frame(ctx, swFrame_); + if (ret != 0) { + return (ret); + } + const bool isAcc = swFrame_->format == hwPixFormat_; + if (isAcc) { + int ret2 = av_hwframe_transfer_data(cpuFrame_, swFrame_, 0); + if (ret2 < 0) { + RCLCPP_WARN_STREAM( + logger_, "hardware frame transfer failed for pixel format " << utils::pix(hwPixFormat_)); + return (AVERROR_INVALIDDATA); + } + } + AVFrame * frame = isAcc ? cpuFrame_ : swFrame_; + if (frame->width == ctx->width && frame->height == ctx->height) { + // prepare the decoded message + ImagePtr image(new Image()); + image->height = frame->height; + image->width = frame->width; + image->step = (sensor_msgs::image_encodings::bitDepth(outputMsgEncoding_) / 8) * image->width * + sensor_msgs::image_encodings::numChannels(outputMsgEncoding_); + image->encoding = outputMsgEncoding_; + const int retc = convertFrameToMessage(frame, image); + if (retc != 0) { + return (retc); + } + + auto it = ptsToStamp_.find(swFrame_->pts); + if (it == ptsToStamp_.end()) { + RCLCPP_ERROR_STREAM(logger_, "cannot find pts that matches " << swFrame_->pts); + } else { + image->header.frame_id = it->second.frame_id; + image->header.stamp = it->second.time; + ptsToStamp_.erase(it); +#ifdef USE_AV_FLAGS + callback_(image, swFrame_->flags, utils::pix(static_cast(frame->format))); +#else + callback_(image, swFrame_->key_frame, utils::pix(static_cast(frame->format))); +#endif + } + } + return (ret); +} + +int Decoder::convertFrameToMessage(const AVFrame * frame, const ImagePtr & image) +{ + const auto srcFmt = static_cast(frame->format); + const bool encAsColor = utils::encode_single_channel_as_color(image->encoding, srcFmt); + if (!swsContext_) { // initialize reformatting context if first time + // If encode-mono-as-color hack has been used, leave the pixel + // format as is, and later simply crop away the bottom 1/3 of image. + outputFrame_->format = encAsColor ? srcFmt : utils::ros_to_av_pix_format(outputMsgEncoding_); + swsContext_ = sws_getContext( + frame->width, frame->height, srcFmt, // src + frame->width, frame->height, static_cast(outputFrame_->format), // dest + SWS_FAST_BILINEAR | SWS_ACCURATE_RND, NULL, NULL, NULL); + if (!swsContext_) { + RCLCPP_ERROR(logger_, "cannot allocate sws context!!!!"); + return (AVERROR_BUFFER_TOO_SMALL); + } + } + + image->data.resize( + encAsColor ? ((image->step * image->height * 3) / 2) : (image->step * image->height)); + // bend the memory pointers in outputFrame_ to the right locations + av_image_fill_arrays( + outputFrame_->data, outputFrame_->linesize, &(image->data[0]), + static_cast(outputFrame_->format), frame->width, frame->height, 1); + sws_scale( + swsContext_, frame->data, frame->linesize, 0, // src + frame->height, outputFrame_->data, outputFrame_->linesize); // dest + + // now resize in case we encoded single-channel as colorz + image->data.resize(image->step * image->height); + return (0); +} + bool Decoder::decodePacket( const std::string & encoding, const uint8_t * data, size_t size, uint64_t pts, const std::string & frame_id, const rclcpp::Time & stamp) { + Lock lock(mutex_); + if (codecContext_ == nullptr) { + RCLCPP_ERROR_STREAM(logger_, "decoder is not initialized!"); + return (false); + } rclcpp::Time t0; if (measurePerformance_) { t0 = rclcpp::Clock().now(); } - if (encoding != encoding_) { + if (encoding != packetEncoding_) { RCLCPP_ERROR_STREAM( - logger_, "no on-the fly encoding change from " << encoding_ << " to " << encoding); + logger_, "no on-the fly encoding change from " << packetEncoding_ << " to " << encoding); return (false); } AVCodecContext * ctx = codecContext_; @@ -202,65 +424,20 @@ bool Decoder::decodePacket( memcpy(packet->data, data, size); packet->pts = pts; packet->dts = packet->pts; - ptsToStamp_[packet->pts] = stamp; + + ptsToStamp_.insert(PTSMap::value_type(packet->pts, {stamp, frame_id})); + int ret = avcodec_send_packet(ctx, packet); if (ret != 0) { RCLCPP_WARN_STREAM(logger_, "send_packet failed for pts: " << pts); av_packet_unref(packet); return (false); } - ret = avcodec_receive_frame(ctx, decodedFrame_); - const bool isAcc = (ret == 0) && (decodedFrame_->format == hwPixFormat_); - if (isAcc) { - ret = av_hwframe_transfer_data(cpuFrame_, decodedFrame_, 0); - if (ret < 0) { - RCLCPP_WARN_STREAM(logger_, "failed to transfer data from GPU->CPU"); - av_packet_unref(packet); - return (false); - } - } - AVFrame * frame = isAcc ? cpuFrame_ : decodedFrame_; - - if (ret == 0 && frame->width != 0) { - // convert image to something palatable - if (!swsContext_) { - swsContext_ = sws_getContext( - ctx->width, ctx->height, (AVPixelFormat)frame->format, // src - ctx->width, ctx->height, (AVPixelFormat)colorFrame_->format, // dest - SWS_FAST_BILINEAR | SWS_ACCURATE_RND, NULL, NULL, NULL); - if (!swsContext_) { - RCLCPP_ERROR(logger_, "cannot allocate sws context!!!!"); - return (false); - } - } - // prepare the decoded message - ImagePtr image(new Image()); - image->height = frame->height; - image->width = frame->width; - image->step = image->width * 3; // 3 bytes per pixel - image->encoding = sensor_msgs::image_encodings::BGR8; - image->data.resize(image->step * image->height); - - // bend the memory pointers in colorFrame to the right locations - av_image_fill_arrays( - colorFrame_->data, colorFrame_->linesize, &(image->data[0]), - (AVPixelFormat)colorFrame_->format, frame->width, frame->height, 1); - sws_scale( - swsContext_, frame->data, frame->linesize, 0, // src - ctx->height, colorFrame_->data, colorFrame_->linesize); // dest - auto it = ptsToStamp_.find(decodedFrame_->pts); - if (it == ptsToStamp_.end()) { - RCLCPP_ERROR_STREAM(logger_, "cannot find pts that matches " << decodedFrame_->pts); - } else { - image->header.frame_id = frame_id; - image->header.stamp = it->second; - ptsToStamp_.erase(it); -#ifdef USE_AV_FLAGS - callback_(image, decodedFrame_->flags || AV_FRAME_FLAG_KEY); // deliver callback -#else - callback_(image, decodedFrame_->key_frame); // deliver callback -#endif - } + int rret{0}; + // return value of 0 means got frame, + // EAGAIN means there are no more frames + while ((rret = receiveFrame()) == 0) { + // keep polling for frames as long as there are any } av_packet_unref(packet); av_packet_free(&packet); @@ -269,18 +446,55 @@ bool Decoder::decodePacket( double dt = (t1 - t0).seconds(); tdiffTotal_.update(dt); } - return (true); + return (rret == AVERROR(EAGAIN)); } -void Decoder::resetTimers() { tdiffTotal_.reset(); } +void Decoder::resetTimers() +{ + Lock lock(mutex_); + tdiffTotal_.reset(); +} void Decoder::printTimers(const std::string & prefix) const { + Lock lock(mutex_); RCLCPP_INFO_STREAM(logger_, prefix << " total decode: " << tdiffTotal_); } +void Decoder::setAVOption(const std::string & field, const std::string & value) +{ + if (!value.empty() && codecContext_ && codecContext_->priv_data) { + const int err = + av_opt_set(codecContext_->priv_data, field.c_str(), value.c_str(), AV_OPT_SEARCH_CHILDREN); + if (err != 0) { + RCLCPP_ERROR_STREAM( + logger_, "cannot set option " << field << " to value " << value << ": " << utils::err(err)); + } + } +} + +void Decoder::findDecoders( + const std::string & codec, std::vector * hw_decoders, + std::vector * sw_decoders) +{ + utils::find_decoders(codec, hw_decoders, sw_decoders); +} + +std::string Decoder::findDecoders(const std::string & codec) +{ + return (utils::find_decoders(codec)); +} + +// -------------- deprecated, DO NOT USE ------------------ const std::unordered_map & Decoder::getDefaultEncoderToDecoderMap() { + static const std::unordered_map defaultMap{ + {{"h264_nvenc", "h264"}, + {"libx264", "h264"}, + {"hevc_nvenc", "hevc_cuvid"}, + {"h264_nvmpi", "h264"}, + {"h264_vaapi", "h264"}}}; return (defaultMap); } + } // namespace ffmpeg_encoder_decoder diff --git a/src/encoder.cpp b/src/encoder.cpp index ed6ca85..80f1de4 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -43,6 +43,15 @@ void Encoder::reset() closeCodec(); } +void Encoder::setEncoder(const std::string & n) +{ + Lock lock(mutex_); + encoder_ = n; + codec_ = utils::find_codec(encoder_); +} + +std::string Encoder::findCodec(const std::string & encoder) { return (utils::find_codec(encoder)); } + static void free_frame(AVFrame ** frame) { if (*frame) { @@ -87,40 +96,57 @@ AVPixelFormat Encoder::pixelFormat(const std::string & f) const return (fmt); } -bool Encoder::initialize(int width, int height, Callback callback) +bool Encoder::initialize(int width, int height, Callback callback, const std::string & encoding) { Lock lock(mutex_); callback_ = callback; - return (openCodec(width, height)); + return (openCodec(width, height, encoding)); } -void Encoder::openVAAPIDevice(const AVCodec * codec, int width, int height) +void Encoder::openHardwareDevice( + const AVCodec * codec, enum AVHWDeviceType hwDevType, int width, int height) { int err = 0; - err = av_hwdevice_ctx_create(&hwDeviceContext_, AV_HWDEVICE_TYPE_VAAPI, NULL, NULL, 0); + err = av_hwdevice_ctx_create(&hwDeviceContext_, hwDevType, NULL, NULL, 0); utils::check_for_err("cannot create hw device context", err); - AVBufferRef * hw_frames_ref = av_hwframe_ctx_alloc(hwDeviceContext_); - if (!hw_frames_ref) { - throw std::runtime_error("cannot allocate hw device!"); + + AVBufferRef * hwframe_ctx_ref = av_hwframe_ctx_alloc(hwDeviceContext_); + if (!hwframe_ctx_ref) { + av_buffer_unref(&hwDeviceContext_); + hwDeviceContext_ = 0; + throw std::runtime_error("cannot allocate hwframe context!"); + } + const auto hwframe_transfer_formats = + utils::get_hwframe_transfer_formats(hwframe_ctx_ref, AV_HWFRAME_TRANSFER_DIRECTION_TO); + + if (hwframe_transfer_formats.empty()) { + // Bernd: Apparently NVENC does not need a device to be opened at all. + // But how to tell if a device should be opened in general? + // Checking if the list of TO transfer formats is empty() works to tell + // NVENC (no device required) from VAAPI (device required). + // This test may be broken in general! + av_buffer_unref(&hwDeviceContext_); + hwDeviceContext_ = 0; + av_buffer_unref(&hwframe_ctx_ref); + RCLCPP_INFO_STREAM(logger_, "no need to open device for codec " << codec->name); + return; } - AVHWFramesContext * frames_ctx = reinterpret_cast(hw_frames_ref->data); - frames_ctx->format = utils::find_hw_config(&usesHardwareFrames_, AV_HWDEVICE_TYPE_VAAPI, codec); + // cast the reference to a concrete pointer + AVHWFramesContext * frames_ctx = reinterpret_cast(hwframe_ctx_ref->data); + frames_ctx->format = utils::find_hw_config(&usesHardwareFrames_, hwDevType, codec); if (usesHardwareFrames_) { - const auto fmts = utils::get_hwframe_transfer_formats(hw_frames_ref); - frames_ctx->sw_format = utils::get_preferred_pixel_format("h264_vaapi", fmts); + frames_ctx->sw_format = + utils::get_preferred_pixel_format(usesHardwareFrames_, hwframe_transfer_formats); if (pixFormat_ != AV_PIX_FMT_NONE) { RCLCPP_INFO_STREAM( logger_, "user overriding software pix fmt " << utils::pix(frames_ctx->sw_format)); RCLCPP_INFO_STREAM(logger_, "with " << utils::pix(pixFormat_)); frames_ctx->sw_format = pixFormat_; // override default at your own risk! - } else { - RCLCPP_INFO_STREAM( - logger_, "using software pixel format: " << utils::pix(frames_ctx->sw_format)); } if (frames_ctx->sw_format == AV_PIX_FMT_NONE) { - av_buffer_unref(&hw_frames_ref); + av_buffer_unref(&hwframe_ctx_ref); throw(std::runtime_error("cannot find valid sw pixel format!")); } } @@ -128,23 +154,22 @@ void Encoder::openVAAPIDevice(const AVCodec * codec, int width, int height) frames_ctx->width = width; frames_ctx->height = height; frames_ctx->initial_pool_size = 20; - if ((err = av_hwframe_ctx_init(hw_frames_ref)) < 0) { - av_buffer_unref(&hw_frames_ref); - utils::throw_err("failed to initialize VAAPI frame context", err); + if ((err = av_hwframe_ctx_init(hwframe_ctx_ref)) < 0) { + av_buffer_unref(&hwframe_ctx_ref); + utils::throw_err("failed to initialize hardware frame context", err); } - codecContext_->hw_frames_ctx = av_buffer_ref(hw_frames_ref); - - av_buffer_unref(&hw_frames_ref); + codecContext_->hw_frames_ctx = av_buffer_ref(hwframe_ctx_ref); + av_buffer_unref(&hwframe_ctx_ref); if (codecContext_->hw_frames_ctx == nullptr) { - throw(std::runtime_error("vaapi: cannot create buffer ref!")); + throw(std::runtime_error("hardware decoder: cannot create buffer ref!")); } } -bool Encoder::openCodec(int width, int height) +bool Encoder::openCodec(int width, int height, const std::string & encoding) { try { - doOpenCodec(width, height); + doOpenCodec(width, height, encoding); } catch (const std::runtime_error & e) { RCLCPP_ERROR_STREAM(logger_, e.what()); closeCodec(); @@ -155,9 +180,25 @@ bool Encoder::openCodec(int width, int height) return (true); } -void Encoder::doOpenCodec(int width, int height) +enum AVPixelFormat Encoder::findMatchingSourceFormat( + const std::string & rosSrcFormat, enum AVPixelFormat targetFormat) +{ + const auto targetFormatDesc = av_pix_fmt_desc_get(targetFormat); + if (!targetFormatDesc) { + RCLCPP_ERROR_STREAM(logger_, "invalid libav target format: " << static_cast(targetFormat)); + throw(std::runtime_error("invalid libav target format")); + } + // special hack to encode single-channel images as nv12/yuv420p etc + if (utils::encode_single_channel_as_color(rosSrcFormat, targetFormat)) { + return (targetFormat); + } + return (utils::ros_to_av_pix_format(rosSrcFormat)); +} + +void Encoder::doOpenCodec(int width, int height, const std::string & origEncoding) { int err = 0; + codecContext_ = nullptr; if (encoder_.empty()) { throw(std::runtime_error("no codec set!")); @@ -183,17 +224,20 @@ void Encoder::doOpenCodec(int width, int height) auto pixFmts = utils::get_encoder_formats(codecContext_, codec); - codecContext_->bit_rate = bitRate_; - codecContext_->qmax = qmax_; // 0: highest, 63: worst quality bound codecContext_->width = width; codecContext_->height = height; codecContext_->time_base = timeBase_; codecContext_->framerate = frameRate_; + codecContext_->bit_rate = bitRate_; + codecContext_->qmax = qmax_; // 0: highest, 63: worst quality bound codecContext_->gop_size = GOPSize_; - codecContext_->max_b_frames = 0; // nvenc can only handle zero! + codecContext_->max_b_frames = maxBFrames_; // nvenc can only handle zero! - if (encoder_.find("vaapi") != std::string::npos) { - openVAAPIDevice(codec, width, height); + const enum AVHWDeviceType hwDevType = utils::find_hw_device_type(codec); + if (hwDevType != AV_HWDEVICE_TYPE_NONE) { + RCLCPP_INFO_STREAM( + logger_, encoder_ << " uses hw device: " << av_hwdevice_get_type_name(hwDevType)); + openHardwareDevice(codec, hwDevType, width, height); } if (usesHardwareFrames_) { @@ -202,23 +246,31 @@ void Encoder::doOpenCodec(int width, int height) codecContext_->sw_pix_fmt = frames_ctx->sw_format; codecContext_->pix_fmt = frames_ctx->format; } else { - codecContext_->pix_fmt = utils::get_preferred_pixel_format(encoder_, pixFmts); + codecContext_->pix_fmt = (pixFormat_ != AV_PIX_FMT_NONE) + ? pixFormat_ + : utils::get_preferred_pixel_format(usesHardwareFrames_, pixFmts); codecContext_->sw_pix_fmt = codecContext_->pix_fmt; } + avSourcePixelFormat_ = utils::pix(codecContext_->sw_pix_fmt); + RCLCPP_INFO_STREAM(logger_, "using av_source_pixel_format: " << avSourcePixelFormat_); - setAVOption("profile", profile_); - setAVOption("preset", preset_); - setAVOption("tune", tune_); - setAVOption("delay", delay_); - setAVOption("crf", crf_); - RCLCPP_DEBUG( - logger_, - "codec: %10s, profile: %10s, preset: %10s," - " bit_rate: %10ld qmax: %2d", - encoder_.c_str(), profile_.c_str(), preset_.c_str(), bitRate_, qmax_); + std::stringstream ss; + for (const auto & kv : avOptions_) { + setAVOption(kv.first, kv.second); + ss << " " << kv.first << "=" << kv.second; + } + RCLCPP_INFO( + logger_, "codec: %10s, bit_rate: %10ld qmax: %2d options: %s", encoder_.c_str(), bitRate_, + qmax_, ss.str().c_str()); + RCLCPP_INFO_STREAM( + logger_, "cv_bridge_target_format: " + << cvBridgeTargetFormat_ + << " libav: " << utils::pix(utils::ros_to_av_pix_format(cvBridgeTargetFormat_))); + RCLCPP_INFO_STREAM(logger_, "av_source_pixel_format: " << utils::pix(codecContext_->sw_pix_fmt)); + RCLCPP_INFO_STREAM(logger_, "encoder (hw) format: " << utils::pix(codecContext_->pix_fmt)); err = avcodec_open2(codecContext_, codec, NULL); - utils::check_for_err("cannot open codec", err); + utils::check_for_err("cannot open codec " + encoder_, err); RCLCPP_INFO_STREAM(logger_, "opened codec: " << encoder_); frame_ = av_frame_alloc(); @@ -252,27 +304,30 @@ void Encoder::doOpenCodec(int width, int height) packet_->data = NULL; packet_->size = 0; - // create (src) frame that wraps the received uncompressed image + // create (src) frame that wraps the received raw image wrapperFrame_ = av_frame_alloc(); wrapperFrame_->width = width; wrapperFrame_->height = height; - wrapperFrame_->format = AV_PIX_FMT_BGR24; + wrapperFrame_->format = findMatchingSourceFormat( + cvBridgeTargetFormat_, static_cast(frame_->format)); // initialize format conversion library if (!swsContext_) { swsContext_ = sws_getContext( - width, height, AV_PIX_FMT_BGR24, // src - width, height, static_cast(frame_->format), // dest + width, height, static_cast(wrapperFrame_->format), // src + width, height, static_cast(frame_->format), // dest SWS_FAST_BILINEAR | SWS_ACCURATE_RND, NULL, NULL, NULL); if (!swsContext_) { throw(std::runtime_error("cannot allocate sws context")); } } + encoding_ = codec_ + ";" + avSourcePixelFormat_ + ";" + cvBridgeTargetFormat_ + ";" + + (origEncoding.empty() ? cvBridgeTargetFormat_ : origEncoding); } void Encoder::setAVOption(const std::string & field, const std::string & value) { - if (!value.empty()) { + if (!value.empty() && codecContext_ && codecContext_->priv_data) { const int err = av_opt_set(codecContext_->priv_data, field.c_str(), value.c_str(), AV_OPT_SEARCH_CHILDREN); if (err != 0) { @@ -288,11 +343,23 @@ void Encoder::encodeImage(const Image & msg) if (measurePerformance_) { t0 = rclcpp::Clock().now(); } - // TODO(Bernd): forcing the encoding to be BGR8 is wasteful when - // the encoder supports monochrome. - - cv::Mat img = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8)->image; - encodeImage(img, msg.header, t0); + try { + if (utils::encode_single_channel_as_color( + cvBridgeTargetFormat_, static_cast(wrapperFrame_->format))) { + // hack to encode single-channel ros formats as nv12/yuv420p etc + cv::Mat img( + msg.height, msg.width, cv::DataType::type, const_cast(msg.data.data()), + msg.step); + // Add empty color channels to the bottom of matrix + img.push_back(cv::Mat::zeros(msg.height / 2, msg.width, cv::DataType::type)); + doEncodeImage(img, msg.header, t0); + } else { + cv::Mat img = cv_bridge::toCvCopy(msg, cvBridgeTargetFormat_)->image; + doEncodeImage(img, msg.header, t0); + } + } catch (const cv_bridge::Exception & e) { + RCLCPP_ERROR_STREAM(logger_, "cv_bridge convert failed: " << e.what()); + } if (measurePerformance_) { const auto t1 = rclcpp::Clock().now(); tdiffDebayer_.update((t1 - t0).seconds()); @@ -300,6 +367,11 @@ void Encoder::encodeImage(const Image & msg) } void Encoder::encodeImage(const cv::Mat & img, const Header & header, const rclcpp::Time & t0) +{ + doEncodeImage(img, header, t0); +} + +void Encoder::doEncodeImage(const cv::Mat & img, const Header & header, const rclcpp::Time & t0) { Lock lock(mutex_); rclcpp::Time t1, t2, t3; @@ -323,7 +395,7 @@ void Encoder::encodeImage(const cv::Mat & img, const Header & header, const rclc } frame_->pts = pts_++; // - ptsToStamp_.insert(PTSMap::value_type(frame_->pts, header.stamp)); + ptsToStamp_.insert(PTSMap::value_type(frame_->pts, {header.stamp, header.frame_id})); int ret; if (usesHardwareFrames_) { @@ -333,14 +405,13 @@ void Encoder::encodeImage(const cv::Mat & img, const Header & header, const rclc } ret = avcodec_send_frame(codecContext_, usesHardwareFrames_ ? hw_frame_ : frame_); - if (measurePerformance_) { t3 = rclcpp::Clock().now(); tdiffSendFrame_.update((t3 - t2).seconds()); } // now drain all packets while (ret == 0) { - ret = drainPacket(header, img.cols, img.rows); + ret = drainPacket(img.cols, img.rows); } if (measurePerformance_) { const rclcpp::Time t4 = rclcpp::Clock().now(); @@ -348,18 +419,20 @@ void Encoder::encodeImage(const cv::Mat & img, const Header & header, const rclc } } -void Encoder::flush(const Header & header) +void Encoder::flush() { if (!frame_) { return; } int ret = avcodec_send_frame(codecContext_, nullptr); while (ret == 0) { - ret = drainPacket(header, frame_->width, frame_->height); + ret = drainPacket(frame_->width, frame_->height); } } -int Encoder::drainPacket(const Header & header, int width, int height) +void Encoder::flush(const Header &) { flush(); } + +int Encoder::drainPacket(int width, int height) { rclcpp::Time t0, t1, t2; if (measurePerformance_) { @@ -379,8 +452,8 @@ int Encoder::drainPacket(const Header & header, int width, int height) } auto it = ptsToStamp_.find(pk.pts); if (it != ptsToStamp_.end()) { - callback_( - header.frame_id, it->second, encoder_, width, height, pk.pts, pk.flags, pk.data, pk.size); + const auto & pe = it->second; + callback_(pe.frame_id, pe.time, encoding_, width, height, pk.pts, pk.flags, pk.data, pk.size); if (measurePerformance_) { const auto t3 = rclcpp::Clock().now(); tdiffPublish_.update((t3 - t2).seconds()); diff --git a/src/utils.cpp b/src/utils.cpp index 05f6779..4c5515d 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -16,8 +16,13 @@ #include #include #include +#include #include #include +#include +#include +#include +#include extern "C" { #include @@ -31,10 +36,25 @@ namespace ffmpeg_encoder_decoder namespace utils { std::string pix(AVPixelFormat const & f) +{ + if (f == AV_PIX_FMT_NONE) { + return (std::string("NONE")); + } + const char * name = av_get_pix_fmt_name(f); + if (!name) { + return (std::string("UNKNOWN")); + } + return (std::string(name)); +} + +std::string pix_long(AVPixelFormat const & f) { char buf[64]; buf[63] = 0; av_get_pix_fmt_string(buf, sizeof(buf) - 1, f); + if (f == AV_PIX_FMT_NONE) { + return (std::string("NONE")); + } return (std::string(buf)); } @@ -57,6 +77,17 @@ void check_for_err(const std::string & msg, int errnum) } } +enum AVHWDeviceType find_hw_device_type(const AVCodec * codec) +{ + if (codec->capabilities & AV_CODEC_CAP_HARDWARE) { + const AVCodecHWConfig * config = avcodec_get_hw_config(codec, 0); + if (config) { + return (config->device_type); + } + } + return (AV_HWDEVICE_TYPE_NONE); +} + enum AVPixelFormat find_hw_config( bool * usesHWFrames, enum AVHWDeviceType hwDevType, const AVCodec * codec) { @@ -83,11 +114,20 @@ static bool has_format(const std::vector & fmts, const AVPixelFor } enum AVPixelFormat get_preferred_pixel_format( - const std::string & encoder, const std::vector & fmts) + bool useHWFormat, const std::vector & fmts) { - // the only format that worked for vaapi was NV12 - if (encoder.find("vaapi") != std::string::npos) { - return (has_format(fmts, AV_PIX_FMT_NV12) ? AV_PIX_FMT_NV12 : AV_PIX_FMT_NONE); + if (useHWFormat) { + // the hardware encoders typically use nv12. + if (has_format(fmts, AV_PIX_FMT_NV12)) { + return (AV_PIX_FMT_NV12); + } + if (has_format(fmts, AV_PIX_FMT_YUV420P)) { + return (AV_PIX_FMT_YUV420P); + } + if (fmts.size() > 0) { + return (fmts[0]); + } + return (AV_PIX_FMT_NONE); } if (has_format(fmts, AV_PIX_FMT_BGR24)) { return (AV_PIX_FMT_BGR24); // fastest, needs no copy @@ -101,7 +141,8 @@ enum AVPixelFormat get_preferred_pixel_format( return (AV_PIX_FMT_NONE); } -std::vector get_encoder_formats(AVCodecContext * context, const AVCodec * c) +std::vector get_encoder_formats( + const AVCodecContext * context, const AVCodec * c) { std::vector formats; if (c) { @@ -122,19 +163,202 @@ std::vector get_encoder_formats(AVCodecContext * context, co return (formats); } -std::vector get_hwframe_transfer_formats(AVBufferRef * hwframe_ctx) +std::vector get_hwframe_transfer_formats( + AVBufferRef * hwframe_ctx, enum AVHWFrameTransferDirection direction) { std::vector formats; AVPixelFormat * fmts{nullptr}; - int ret = - av_hwframe_transfer_get_formats(hwframe_ctx, AV_HWFRAME_TRANSFER_DIRECTION_FROM, &fmts, 0); + int ret = av_hwframe_transfer_get_formats(hwframe_ctx, direction, &fmts, 0); if (ret >= 0) { for (const auto * f = fmts; *f != AV_PIX_FMT_NONE; f++) { formats.push_back(*f); } } + if (fmts) { + av_free(fmts); + } return (formats); } +static const std::unordered_map ros_to_av_pix_map = { + {"bayer_rggb8", AV_PIX_FMT_BAYER_RGGB8}, + {"bayer_bggr8", AV_PIX_FMT_BAYER_BGGR8}, + {"bayer_gbrg8", AV_PIX_FMT_BAYER_GBRG8}, + {"bayer_grbg8", AV_PIX_FMT_BAYER_GRBG8}, + {"bayer_rggb16", AV_PIX_FMT_BAYER_RGGB16LE}, // map to little endian :( + {"bayer_bggr16", AV_PIX_FMT_BAYER_BGGR16LE}, + {"bayer_gbrg16", AV_PIX_FMT_BAYER_GBRG16LE}, + {"bayer_grbg16", AV_PIX_FMT_BAYER_GRBG16LE}, + {"rgb8", AV_PIX_FMT_RGB24}, + {"rgba8", AV_PIX_FMT_RGBA}, + {"rgb16", AV_PIX_FMT_RGB48LE}, + {"rgba16", AV_PIX_FMT_RGBA64LE}, + {"bgr8", AV_PIX_FMT_BGR24}, + {"bgra8", AV_PIX_FMT_BGRA}, + {"bgr16", AV_PIX_FMT_BGR48LE}, + {"bgra16", AV_PIX_FMT_BGRA64LE}, + {"mono8", AV_PIX_FMT_GRAY8}, + {"mono16", AV_PIX_FMT_GRAY16LE}, + {"yuv422", AV_PIX_FMT_YUV422P}, // deprecated, not sure correct + {"uyvy", AV_PIX_FMT_UYVY422}, // not sure that is correct + {"yuyv", AV_PIX_FMT_YUYV422}, // not sure that is correct + {"yuv422_yuy2", AV_PIX_FMT_YUV422P16LE}, // deprecated, probably wrong + {"nv21", AV_PIX_FMT_NV21}, + {"nv24", AV_PIX_FMT_NV24}, + {"nv12", AV_PIX_FMT_NV12} // not an official ROS encoding!! +}; + +enum AVPixelFormat ros_to_av_pix_format(const std::string & ros_pix_fmt) +{ + const auto it = ros_to_av_pix_map.find(ros_pix_fmt); + if (it == ros_to_av_pix_map.end()) { + RCLCPP_ERROR_STREAM( + rclcpp::get_logger("encoder"), "no AV pixel format known for ros format " << ros_pix_fmt); + throw(std::runtime_error("no matching pixel format found for: " + ros_pix_fmt)); + } + return (it->second); +} + +// find codec by name +static const AVCodec * find_by_name(const std::string & name) +{ + const AVCodec * c; + void * iter = nullptr; + while ((c = av_codec_iterate(&iter))) { + if (c->name == name) { + return (c); + } + } + return (nullptr); +} + +static void find_decoders( + AVCodecID encoding, std::vector * decoders, bool with_hw_support) +{ + const AVCodec * c; + void * iter = nullptr; + while ((c = av_codec_iterate(&iter))) { + if (av_codec_is_decoder(c) && c->id == encoding) { + if (with_hw_support) { + if ((c->capabilities & AV_CODEC_CAP_HARDWARE) && (avcodec_get_hw_config(c, 0) != nullptr)) { + decoders->push_back(c->name); + } + } else { + if (!(c->capabilities & AV_CODEC_CAP_HARDWARE)) { + decoders->push_back(c->name); + } + } + } + } +} + +std::vector split_by_char(const std::string & str_list, const char sep) +{ + std::stringstream ss(str_list); + std::vector split; + for (std::string s; ss.good();) { + getline(ss, s, sep); + if (!s.empty()) { + split.push_back(s); + } + } + return (split); +} + +// This function finds the encoding that is the target of a given encoder. + +static AVCodecID find_id_for_encoder_or_encoding(const std::string & encoder) +{ + // first look for encoder with that name + const AVCodec * c = find_by_name(encoder); + if (!c) { + const AVCodecDescriptor * desc = NULL; + while ((desc = avcodec_descriptor_next(desc)) != nullptr) { + if (desc->name == encoder) { + return (desc->id); + } + } + throw(std::runtime_error("unknown encoder/encoding: " + encoder)); + } + return (c->id); +} + +void find_decoders( + const std::string & encoding, std::vector * hw_decoders, + std::vector * sw_decoders) +{ + // in case the passed in encoding is actually an encoder... + const auto real_encoding = find_id_for_encoder_or_encoding(encoding); + // first use hw accelerated codecs, then software + find_decoders(real_encoding, hw_decoders, true); + find_decoders(real_encoding, sw_decoders, false); +} + +std::string find_decoders(const std::string & codec) +{ + std::string decoders; + std::vector hw_dec; + std::vector sw_dec; + find_decoders(codec, &hw_dec, &sw_dec); + for (const auto & s : hw_dec) { + decoders += (decoders.empty() ? "" : ",") + s; + } + for (const auto & s : sw_dec) { + decoders += (decoders.empty() ? "" : ",") + s; + } + return decoders; +} + +std::string filter_decoders(const std::string & codec, const std::string & decoders) +{ + std::string filtered; + const auto valid_decoders = split_by_char(find_decoders(codec), ','); + for (const auto & dec : split_by_char(decoders, ',')) { + for (const auto & valid : valid_decoders) { + if (dec == valid) { + filtered += (filtered.empty() ? "" : ",") + dec; + } + } + } + return (filtered); +} + +std::string find_codec(const std::string & encoder) +{ + const AVCodec * c = find_by_name(encoder); + if (!c) { + throw(std::runtime_error("unknown encoder: " + encoder)); + } + const AVCodecDescriptor * desc = NULL; + while ((desc = avcodec_descriptor_next(desc)) != nullptr) { + if (desc->id == c->id) { + return (desc->name); + } + } + throw(std::runtime_error("weird ffmpeg config error???")); +} + +static bool isNV12LikeFormat(enum AVPixelFormat fmt) +{ + // any format where the top of the image is a full-resolution + // luminance image, and the bottom 1/3 holds the color channels + // should work + return (fmt == AV_PIX_FMT_NV12 || fmt == AV_PIX_FMT_YUV420P); +} + +bool encode_single_channel_as_color(const std::string & encoding, enum AVPixelFormat fmt) +{ + return (sensor_msgs::image_encodings::numChannels(encoding) == 1 && isNV12LikeFormat(fmt)); +} + +std::vector get_hwdevice_types() +{ + std::vector types; + for (enum AVHWDeviceType type = av_hwdevice_iterate_types(AV_HWDEVICE_TYPE_NONE); + type != AV_HWDEVICE_TYPE_NONE; type = av_hwdevice_iterate_types(type)) { + types.push_back(av_hwdevice_get_type_name(type)); + } + return (types); +} } // namespace utils } // namespace ffmpeg_encoder_decoder diff --git a/test/encoder_decoder_test.cpp b/test/encoder_decoder_test.cpp new file mode 100644 index 0000000..bf525b9 --- /dev/null +++ b/test/encoder_decoder_test.cpp @@ -0,0 +1,89 @@ +// -*-c++-*--------------------------------------------------------------------------------------- +// Copyright 2025 Bernd Pfrommer +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "encoder_decoder_tester.hpp" + +TEST(ffmpeg_encoder_decoder, encoder_decoder_bgr8) +{ + const std::string orig_enc = "bgr8"; + EncoderDecoderTester tester; + // encoding options + tester.setOriginalROSEncoding(orig_enc); + tester.setEncoder("libx264rgb"); + tester.setCVBridgeTargetFormat(orig_enc); + // encoder only supports bgr0, bgr24, rgb24 + // tester.setAVSourcePixelFormat("bgr24"); // automatically picked + tester.addAVOption("crf", "0"); // needed for lossless + // decoding options + tester.setExpectedPacketEncoding("h264;bgr24;" + orig_enc + ";" + orig_enc); + tester.setFinalROSEncoding(orig_enc); + tester.setDecoder("h264"); // codec == encoder in this case + tester.setDecoderExpectedAVPixFmt("gbrp"); // picked by decoder + tester.runTest(); + tester.check(); +} + +TEST(ffmpeg_encoder_decoder, encoder_decoder_mono) +{ + const std::string orig_enc = "mono8"; + EncoderDecoderTester tester; + // encoding options + tester.setOriginalROSEncoding(orig_enc); + tester.setEncoder("libx265"); + tester.setCVBridgeTargetFormat(orig_enc); + tester.setAVSourcePixelFormat("gray"); + tester.addAVOption("x265-params", "lossless=1"); + tester.setExpectedPacketEncoding("hevc;gray;" + orig_enc + ";" + orig_enc); + tester.addAVOption("crf", "0"); // may not be needed for lossless + // decoding options + tester.setFinalROSEncoding(orig_enc); + tester.setDecoder("hevc"); + tester.setDecoderExpectedAVPixFmt("gray"); + tester.runTest(); + tester.check(); +} + +TEST(ffmpeg_encoder_decoder, encoder_decoder_bayer) +{ + // tests the special hacks required for bayer encoding + const std::string orig_enc = "bayer_rggb8"; + EncoderDecoderTester tester; + // --- encoding options + tester.setOriginalROSEncoding(orig_enc); + tester.setEncoder("libx265"); + // setting target format no conversion + tester.setCVBridgeTargetFormat(orig_enc); + // encoder will automatically pick yuv420p, + // then recognize that this requires single-channel to + // color conversion, and do this losslessly. + // tester.setAVSourcePixelFormat("yuv420p"); // not needed + tester.addAVOption("x265-params", "lossless=1"); + tester.setExpectedPacketEncoding("hevc;yuv420p;" + orig_enc + ";" + orig_enc); + // --- decoding options + tester.setFinalROSEncoding(orig_enc); + tester.setDecoder("hevc"); + tester.setDecoderExpectedAVPixFmt("yuv420p"); + tester.runTest(); + tester.check(); +} + +int main(int argc, char ** argv) +{ + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/test/encoder_decoder_tester.cpp b/test/encoder_decoder_tester.cpp new file mode 100644 index 0000000..e6964a1 --- /dev/null +++ b/test/encoder_decoder_tester.cpp @@ -0,0 +1,176 @@ +// -*-c++-*--------------------------------------------------------------------------------------- +// Copyright 2025 Bernd Pfrommer +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "encoder_decoder_tester.hpp" + +#include + +#include + +using namespace std::placeholders; + +sensor_msgs::msg::Image::SharedPtr EncoderDecoderTester::makeTestMessage() +{ + auto img = std::make_shared(); + img->encoding = original_ros_encoding_; + img->width = width_; + img->height = height_; + img->step = (sensor_msgs::image_encodings::bitDepth(img->encoding) / 8) * img->width * + sensor_msgs::image_encodings::numChannels(img->encoding); + img->header.frame_id = frame_id_; + img->is_bigendian = false; + return (img); +} + +int byte_depth(const std::string & enc) +{ + return ( + (sensor_msgs::image_encodings::bitDepth(enc) * sensor_msgs::image_encodings::numChannels(enc)) / + 8); +} + +void EncoderDecoderTester::runTest() +{ + enc_.setQMax(10); + enc_.setBitRate(8242880); + enc_.setGOPSize(2); + enc_.setFrameRate(100, 1); + + if (!enc_.initialize( + width_, height_, + std::bind(&EncoderDecoderTester::packetReady, this, _1, _2, _3, _4, _5, _6, _7, _8, _9), + original_ros_encoding_)) { + std::cerr << "failed to initialize encoder!" << std::endl; + return; + } + std::cout << "original encoding " << original_ros_encoding_ << " has " + << byte_depth(original_ros_encoding_) << " bytes/pixel" << std::endl; + + for (size_t i = 0; i < num_frames_; i++) { + auto img = makeTestMessage(); + img->header.stamp = rclcpp::Time(i + 1, RCL_SYSTEM_TIME); + img->data.resize(img->step * img->height, static_cast(i)); + addImage(img); + enc_.encodeImage(*img); + } + enc_.flush(); + dec_.flush(); +} + +void EncoderDecoderTester::check() +{ + const size_t n = num_frames_; + EXPECT_EQ(getTsSum(), (n * (n + 1)) / 2); + EXPECT_EQ(getPtsSum(), (n * (n - 1)) / 2); + EXPECT_EQ(getPacketCounter(), n); + EXPECT_EQ(getFrameCounter(), n); +} + +void EncoderDecoderTester::packetReady( + const std::string & frame_id, const rclcpp::Time & stamp, const std::string & codec, + uint32_t width, uint32_t height, uint64_t pts, uint8_t flags, uint8_t * data, size_t sz) +{ + pts_sum_ += pts; + ffmpeg_image_transport_msgs::msg::FFMPEGPacket msg; + msg.header.frame_id = frame_id; + msg.header.stamp = stamp; + msg.encoding = codec; + msg.flags = flags; + msg.pts = pts; + msg.width = width; + msg.height = height; + msg.data.reserve(sz); + std::copy(data, data + sz, std::back_inserter(msg.data)); + decodePacket(msg); +} + +void EncoderDecoderTester::addImage(const Image::ConstSharedPtr & img) +{ + time_to_image_.insert({rclcpp::Time(img->header.stamp).nanoseconds(), img}); +} + +void EncoderDecoderTester::decodePacket(const FFMPEGPacket & msg) +{ + EXPECT_EQ(msg.header.frame_id, frame_id_); + EXPECT_EQ(msg.encoding, expected_packet_encoding_); + EXPECT_EQ(msg.width, width_); + EXPECT_EQ(msg.height, height_); + + if (!dec_.isInitialized()) { + dec_.initialize( + msg.encoding, std::bind(&EncoderDecoderTester::imageCallback, this, _1, _2, _3), + decoder_name_); + } + if (!dec_.decodePacket( + msg.encoding, &msg.data[0], msg.data.size(), msg.pts, msg.header.frame_id, + msg.header.stamp)) { + std::cerr << "error decoding packet!" << std::endl; + throw(std::runtime_error("error decoding packet!")); + } + packet_counter_++; +} + +void EncoderDecoderTester::imageCallback( + const Image::ConstSharedPtr & img, bool /* isKeyFrame */, const std::string & av_pix_fmt) +{ + const auto stamp = rclcpp::Time(img->header.stamp).nanoseconds(); + ts_sum_ += stamp; + const auto it = time_to_image_.find(stamp); + if (it == time_to_image_.end()) { + std::cerr << "cannot find image from time stamp " << stamp << std::endl; + throw(std::runtime_error("image time stamp not found")); + } + const auto & orig = it->second; + EXPECT_EQ(av_pix_fmt, decoder_av_pix_fmt_); + EXPECT_EQ(img->header.frame_id, orig->header.frame_id); + EXPECT_EQ(img->header.stamp, orig->header.stamp); + EXPECT_EQ(img->encoding, orig->encoding); + EXPECT_EQ(img->is_bigendian, orig->is_bigendian); + EXPECT_EQ(img->width, orig->width); + EXPECT_EQ(img->height, orig->height); + EXPECT_EQ(img->step, orig->step); + EXPECT_EQ(img->data.size(), orig->data.size()); + EXPECT_EQ(img->step, orig->step); + EXPECT_EQ(img->step * img->height, orig->data.size()); + EXPECT_GE(img->data.size(), 1); + const int num_channels = sensor_msgs::image_encodings::numChannels(img->encoding); + const int num_orig_channels = sensor_msgs::image_encodings::numChannels(orig->encoding); + if (num_channels != num_orig_channels) { + std::cerr << "num channel mismatch! orig: " << num_orig_channels << " vs now: " << num_channels + << std::endl; + throw(std::runtime_error("num channel mismatch!")); + } + bool data_equal{true}; + int last_err = 0; + for (size_t row = 0; row < img->height; row++) { + for (size_t col = 0; col < img->width; col++) { + for (int channel = 0; channel < num_channels; channel++) { + const uint8_t * p_orig = &(orig->data[row * orig->step + col * num_channels + channel]); + const uint8_t * p = &(img->data[row * orig->step + col * num_channels + channel]); + int err = static_cast(*p) - static_cast(*p_orig); + if (err && (err != last_err || col == 0 || col == img->width - 1)) { + std::cout << "mismatch image # " << rclcpp::Time(img->header.stamp).nanoseconds() + << " at line " << row << " col: " << col << " chan: " << channel + << ", orig: " << static_cast(*p_orig) << " now: " << static_cast(*p) + << std::endl; + data_equal = false; + last_err = err; + } + } + } + } + EXPECT_TRUE(data_equal); + frame_counter_++; +} diff --git a/test/encoder_decoder_tester.hpp b/test/encoder_decoder_tester.hpp new file mode 100644 index 0000000..0a1e7da --- /dev/null +++ b/test/encoder_decoder_tester.hpp @@ -0,0 +1,93 @@ +// -*-c++-*--------------------------------------------------------------------------------------- +// Copyright 2025 Bernd Pfrommer +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ENCODER_DECODER_TESTER_HPP_ +#define ENCODER_DECODER_TESTER_HPP_ +#include +#include +#include +#include +#include + +class EncoderDecoderTester +{ +public: + using Image = sensor_msgs::msg::Image; + using EncoderCallback = ffmpeg_encoder_decoder::Encoder::Callback; + using FFMPEGPacket = ffmpeg_image_transport_msgs::msg::FFMPEGPacket; + explicit EncoderDecoderTester(size_t n = 10, uint32_t width = 640, uint32_t height = 480) + : num_frames_(n), width_(width), height_(height) + { + } + // ------- encoder side options + void setNumFrames(int n) { num_frames_ = n; } + void setOriginalROSEncoding(const std::string & s) { original_ros_encoding_ = s; } + void setFinalROSEncoding(const std::string & s) { final_ros_encoding_ = s; } + void setWidth(uint32_t n) { width_ = n; } + void setHeight(uint32_t n) { height_ = n; } + void setEncoder(const std::string & n) { enc_.setEncoder(n); } + void setAVSourcePixelFormat(const std::string & f) { enc_.setAVSourcePixelFormat(f); } + void setCVBridgeTargetFormat(const std::string & f) { enc_.setCVBridgeTargetFormat(f); } + void addAVOption(const std::string & k, const std::string & v) { enc_.addAVOption(k, v); } + void setExpectedPacketEncoding(const std::string & s) { expected_packet_encoding_ = s; } + // ------- decoder side options + void setDecoder(const std::string & s) { decoder_name_ = s; } + void setDecoderExpectedAVPixFmt(const std::string & s) { decoder_av_pix_fmt_ = s; } + + // ------- getters + const auto & getImageSum() const { return (image_sum_); } + const auto & getTsSum() const { return (ts_sum_); } + const auto & getFrameCounter() const { return (frame_counter_); } + const auto & getPacketCounter() const { return (packet_counter_); } + const auto & getPtsSum() const { return (pts_sum_); } + + void runTest(); + void check(); + +private: + void packetReady( + const std::string & frame_id, const rclcpp::Time & stamp, const std::string & codec, + uint32_t width, uint32_t height, uint64_t pts, uint8_t flags, uint8_t * data, size_t sz); + void addImage(const Image::ConstSharedPtr & img); + void decodePacket(const FFMPEGPacket & msg); + void imageCallback( + const Image::ConstSharedPtr & img, bool /* isKeyFrame */, const std::string & av_pix_fmt); + sensor_msgs::msg::Image::SharedPtr makeTestMessage(); + // ----------------- variables + // --- encoder related + ffmpeg_encoder_decoder::Encoder enc_; + size_t num_frames_{10}; + std::string frame_id_{"frame_id"}; + std::string original_ros_encoding_; + std::string encoder_name_; + uint32_t width_{0}; + uint32_t height_{0}; + // --- decoder related + ffmpeg_encoder_decoder::Decoder dec_; + std::string decoder_name_; + std::string final_ros_encoding_; + std::string decoder_av_pix_fmt_; + std::string expected_packet_encoding_; + // --- statistics + uint64_t image_sum_{0}; + int64_t ts_sum_{0}; + uint64_t frame_counter_{0}; + uint64_t packet_counter_{0}; + uint64_t pts_sum_{0}; + // --- misc + std::unordered_map time_to_image_; +}; + +#endif // ENCODER_DECODER_TESTER_HPP_ diff --git a/test/encoder_test.cpp b/test/encoder_test.cpp deleted file mode 100644 index a9cb44e..0000000 --- a/test/encoder_test.cpp +++ /dev/null @@ -1,85 +0,0 @@ -// -*-c++-*--------------------------------------------------------------------------------------- -// Copyright 2024 Bernd Pfrommer -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include -#include -#include - -static const char * g_codec = "libx264"; -static const char * g_frame_id = "frame_id"; -static const int g_width = 1920; // must be mult of 64 for some codecs! -static const int g_height = 1080; - -static int g_frame_counter(0); - -void packetReady( - const std::string & frame_id, const rclcpp::Time & stamp, const std::string & codec, - uint32_t width, uint32_t height, uint64_t pts, uint8_t flags, uint8_t * data, size_t sz) -{ - (void)stamp; - (void)pts; - (void)flags; - (void)data; - (void)sz; - EXPECT_EQ(static_cast(width), g_width); - EXPECT_EQ(static_cast(height), g_height); - EXPECT_EQ(frame_id, g_frame_id); - EXPECT_EQ(codec, g_codec); - g_frame_counter++; -} - -void test_encoder(int numFrames, const std::string & codec) -{ - ffmpeg_encoder_decoder::Encoder enc; - enc.setEncoder(codec); - enc.setProfile("main"); - enc.setPreset("slow"); - enc.setQMax(10); - enc.setBitRate(8242880); - enc.setGOPSize(2); - enc.setFrameRate(100, 1); - - if (!enc.initialize(g_width, g_height, packetReady)) { - std::cerr << "failed to initialize encoder!" << std::endl; - return; - } - std_msgs::msg::Header header; - header.frame_id = g_frame_id; - for (int i = 0; i < numFrames; i++) { - cv::Mat mat = cv::Mat::zeros(g_height, g_width, CV_8UC3); // clear image - cv::putText( - mat, std::to_string(i), cv::Point(mat.cols / 2, mat.rows / 2), cv::FONT_HERSHEY_COMPLEX, - 2 /* font size */, cv::Scalar(255, 0, 0) /* col */, 2 /* weight */); - const rclcpp::Time t = rclcpp::Clock().now(); - header.stamp = t; - enc.encodeImage(mat, header, t); - } - enc.flush(header); -} - -TEST(ffmpeg_encoder_decoder, encoder) -{ - test_encoder(10, g_codec); - EXPECT_EQ(g_frame_counter, 10); -} - -int main(int argc, char ** argv) -{ - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -}