diff --git a/.github/actions/git-safe-directory/action.yaml b/.github/actions/git-safe-directory/action.yaml new file mode 100644 index 0000000..9105940 --- /dev/null +++ b/.github/actions/git-safe-directory/action.yaml @@ -0,0 +1,8 @@ +name: Git safe directory +description: For running act with checkout owned by non-root user, e.g. docker +runs: + using: "composite" + steps: + - name: Set git safe.directory to "*" + shell: bash + run: git config --system --add safe.directory '*' \ No newline at end of file diff --git a/.github/actions/setup-size-reports/action.yaml b/.github/actions/setup-size-reports/action.yaml new file mode 100644 index 0000000..277af34 --- /dev/null +++ b/.github/actions/setup-size-reports/action.yaml @@ -0,0 +1,17 @@ +name: Setup size reports +description: Setup size reports +inputs: + gh-context: + description: "GH Context" + required: true + +runs: + using: "composite" + steps: + - run: pip install numpy pandas humanfriendly pyelftools cxxfilt tabulate fastcore ghapi + shell: bash + - name: Set up environment for size reports + shell: bash + env: + GH_CONTEXT: ${{ inputs.gh-context }} + run: python .github/scripts/memory/gh_sizes_environment.py "${GH_CONTEXT}" diff --git a/.github/actions/upload-size-reports/action.yaml b/.github/actions/upload-size-reports/action.yaml new file mode 100644 index 0000000..1a37fb2 --- /dev/null +++ b/.github/actions/upload-size-reports/action.yaml @@ -0,0 +1,17 @@ +name: upload-size-reports +description: upload-size-reports +inputs: + platform-name: + description: "Platform name Name" + required: true + +runs: + using: "composite" + steps: + - name: Uploading Size Reports + uses: actions/upload-artifact@v4 + if: ${{ !env.ACT }} + with: + name: Size,${{ inputs.platform-name }}-Examples,${{ env.GH_EVENT_PR }},${{ env.GH_EVENT_HASH }},${{ env.GH_EVENT_PARENT }},${{ github.event_name }} + path: | + /tmp/bloat_reports/ diff --git a/.github/scripts/memory/README.md b/.github/scripts/memory/README.md new file mode 100644 index 0000000..61706ea --- /dev/null +++ b/.github/scripts/memory/README.md @@ -0,0 +1 @@ +Scripts copied from https://github.com/project-chip/connectedhomeip/tree/master/scripts/tools/memory \ No newline at end of file diff --git a/.github/scripts/memory/gh_report.py b/.github/scripts/memory/gh_report.py new file mode 100644 index 0000000..715a59f --- /dev/null +++ b/.github/scripts/memory/gh_report.py @@ -0,0 +1,409 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Generate reports from size artifacts.""" + +import io +import logging +import re +import sys +import traceback +from typing import Dict + +import fastcore # type: ignore +import memdf.report +import memdf.sizedb +import memdf.util.config +import memdf.util.markdown +import memdf.util.sqlite +import pandas as pd # type: ignore +from memdf import Config, ConfigDescription +from memdf.util.github import Gh + +DB_CONFIG: ConfigDescription = { + Config.group_def('database'): { + 'title': 'database options', + }, + 'database.readonly': { + 'help': 'Open database read only', + 'default': False, + 'argparse': { + 'alias': ['--db-readonly'], + }, + }, +} + +GITHUB_CONFIG: ConfigDescription = { + Config.group_def('github'): { + 'title': 'github options', + }, + 'github.comment': { + 'help': 'Send output as github PR comments', + 'default': False, + 'argparse': { + 'alias': ['--comment'], + }, + }, + 'github.limit-comments': { + 'help': 'Send no more than COUNT comments', + 'metavar': 'COUNT', + 'default': 0, + 'argparse': { + 'type': int, + }, + }, + 'github.limit-artifacts': { + 'help': 'Download no more than COUNT artifacts', + 'metavar': 'COUNT', + 'default': 0, + 'argparse': { + 'type': int, + }, + }, + 'github.limit-pr': { + 'help': 'Report only on PR, if present.', + 'metavar': 'PR', + 'default': 0, + 'argparse': { + 'type': int, + }, + }, +} + +REPORT_CONFIG: ConfigDescription = { + Config.group_map('report'): { + 'group': 'output' + }, + 'report.pr': { + 'help': 'Report on pull requests', + 'default': False, + 'argparse': { + 'alias': ['--pr', '--pull-request'], + }, + }, + 'report.push': { + 'help': 'Report on pushes', + 'default': False, + 'argparse': { + 'alias': ['--push'] + }, + }, + 'report.increases': { + 'help': 'Highlight large increases', + 'metavar': 'PERCENT', + 'default': 0.0, + 'argparse': { + 'alias': ['--threshold'], + 'type': float, + }, + }, +} + + +class SizeContext: + """Generate reports from size artifacts.""" + + comment_format_re = re.compile(r"^") + + def __init__(self, config: Config): + self.config = config + self.gh = Gh(config) + db_file = config.get('database.file', ':memory:') + self.db = memdf.sizedb.SizeDatabase(db_file, + not config['database.readonly']) + self.db.open() + + def add_sizes_from_github(self): + """Read size report artifacts from github.""" + + # Record size artifacts keyed by group and commit_hash to match them up + # after we have the entire list. + size_artifacts: Dict[str, Dict[str, fastcore.basics.AttrDict]] = {} + for a in self.gh.get_size_artifacts(): + if a.group not in size_artifacts: + size_artifacts[a.group] = {} + size_artifacts[a.group][a.commit] = a + logging.debug('ASG: artifact %d %s', a.id, a.name) + + # Determine required size artifacts. + artifact_limit = self.config['github.limit-artifacts'] + required_artifact_ids: set[int] = set() + for group, group_reports in size_artifacts.items(): + logging.debug('ASG: group %s', group) + for report in group_reports.values(): + if self.should_report(report.event): + if report.parent not in group_reports: + logging.debug('ASN: No match for %s', report.name) + continue + if (artifact_limit + and len(required_artifact_ids) >= artifact_limit): + continue + # We have size information for both this report and its + # parent, so ensure that both artifacts are downloaded. + parent = group_reports[report.parent] + required_artifact_ids.add(report.id) + required_artifact_ids.add(parent.id) + logging.debug('ASM: Match %s', report.parent) + logging.debug('ASR: %s %s', report.id, report.name) + logging.debug('ASP: %s %s', parent.id, parent.name) + + # Download and add required artifacts. + for i in required_artifact_ids: + blob = self.gh.download_artifact(i) + if blob: + try: + self.db.add_sizes_from_zipfile(io.BytesIO(blob), + {'artifact': i}) + except Exception: + # Report in case the zipfile is invalid, however do not fail + # all the rest (behave as if artifact download has failed) + traceback.print_exc() + + def read_inputs(self): + """Read size report from github and/or local files.""" + if self.gh: + self.add_sizes_from_github() + for filename in self.config['args.inputs']: + self.db.add_sizes_from_file(filename) + self.db.commit() + return self + + def should_report(self, event: str = '') -> bool: + """Return true if reporting is enabled for the action event.""" + if not event: + return self.config['report.pr'] or self.config['report.push'] + if event == 'pull_request': + return self.config['report.pr'] + return self.config['report.push'] + + def get_existing_comment(self, pr: int, title: str): + """Check for an existing comment.""" + existing_comment = None + existing_comment_format = 0 + for comment in self.gh.get_comments_for_pr(pr): + comment_parts = comment.body.partition('\n') + if comment_parts[0].strip() == title: + existing_comment = comment + if m := self.comment_format_re.match(comment_parts[2]): + existing_comment_format = int(m.group(1)) + break + return (existing_comment, existing_comment_format) + + def get_newest_commit(self, pr: int) -> str: + """Get the hash of the most recent commit on the PR.""" + commits = sorted( + self.gh.get_commits_for_pr(pr), + key=lambda c: f'{c.commit.committer.date}{c.commit.author.date}', + reverse=True) + return commits[0].sha if commits else '' + + def post_change_report(self, df: pd.DataFrame) -> bool: + """Send a change report as a github comment.""" + if not self.gh: + return False + pr = df.attrs['pr'] + + # Check for an existing size report comment. If one exists, we'll add + # the new information to it. + existing_comment, existing_comment_format = self.get_existing_comment( + pr, df.attrs['title']) + + if not existing_comment: + # Check the most recent commit on the PR, so that we don't comment + # for commits that are already outdated. + commit = df.attrs['commit'] + latest = self.get_newest_commit(pr) + if commit != latest: + logging.info( + 'SCS: PR #%s: not commenting for stale %s; newest is %s', + pr, commit, latest) + # Return True so that the obsolete artifacts get removed. + return True + + if existing_comment_format == 1: + df = V1Comment.merge(df, existing_comment) + else: + existing_comment = None + text = V1Comment.format(self.config, df) + + if existing_comment: + return self.gh.update_comment(existing_comment.id, text) + return self.gh.create_comment(pr, text) + + def report_matching_commits(self) -> Dict[str, pd.DataFrame]: + """Report on all new comparable commits.""" + if not self.should_report(): + return {} + + comment_count = 0 + comment_limit = self.config['github.limit-comments'] + comment_enabled = (self.config['github.comment'] + or self.config['github.dryrun-comment']) + + only_pr = self.config['github.limit-pr'] + + dfs = {} + commits = self.db.select_matching_commits() + for event, pr, commit, parent in commits.fetchall(): + if not self.should_report(event): + continue + + # Github doesn't have a way to fetch artifacts associated with a + # particular PR. For testing purposes, filter to a single PR here. + if only_pr and pr != only_pr: + continue + + changes = self.db.select_changes(parent, commit) + + self.db.delete_builds(changes.stale_builds) + self.gh.delete_artifacts(changes.stale_artifacts) + + if not changes.rows: + # Matching commits had no new matching builds. + continue + + df = pd.DataFrame(changes.rows, columns=changes.columns) + + # Filter down to region reports only. + df = df[df['kind'] == 'region'].drop('kind', axis=1) + + df.attrs = { + 'name': f'{pr},{parent},{commit}', + 'title': (f'PR #{pr}: ' if pr else '') + + f'Size comparison from {parent} to {commit}', + 'things': changes.things, + 'builds': changes.builds, + 'artifacts': changes.artifacts, + 'pr': pr, + 'commit': commit, + 'parent': parent, + } + dfs[df.attrs['name']] = df + + if (event == 'pull_request' and comment_enabled + and (comment_limit == 0 or comment_limit > comment_count)): + if self.post_change_report(df): + # Mark the originating builds, and remove the originating + # artifacts, so that they don't generate duplicate report + # comments. + self.db.set_commented(df.attrs['builds']) + self.gh.delete_artifacts(df.attrs['artifacts']) + comment_count += 1 + return dfs + + +class V1Comment: + """Format of a GitHub comment.""" + + @staticmethod + def format(config: Config, df: pd.DataFrame): + """Format a GitHub comment.""" + + threshold_df = None + increase_df = df[df['change'] > 0] + if increase_df.empty: + increase_df = None + elif threshold := config['report.increases']: + threshold_df = df[df['% change'] > threshold] + if threshold_df.empty: + threshold_df = None + + with io.StringIO() as md: + md.write(df.attrs['title']) + md.write('\n\n\n') + + if threshold_df is not None: + md.write(f'**Increases above {threshold:.2g}%:**\n\n') + md.write('\n\n') + V1Comment.write_df(config, threshold_df, md) + + summary = V1Comment.summary(df) + md.write('
\n') + md.write(f'Full report ({summary})\n') + md.write('\n\n') + V1Comment.write_df(config, df, md) + md.write('\n
\n') + + return md.getvalue() + + @staticmethod + def summary(df: pd.DataFrame) -> str: + count = df[['platform', 'target', 'config']].drop_duplicates().shape[0] + platforms = ', '.join(sorted(list(set(df['platform'])))) + return f'{count} build{"" if count == 1 else "s"} for {platforms}' + + @staticmethod + def write_df(config: Config, df: pd.DataFrame, + out: memdf.report.OutputOption): + memdf.report.write_df(config, + df, + out, + 'pipe', + hierify=True, + title=False, + floatfmt='5.1f') + + @staticmethod + def merge(df: pd.DataFrame, comment) -> pd.DataFrame: + """Merge an existing comment into the DataFrame.""" + with io.StringIO(comment.body) as body: + for line in body: + if line.startswith(''): + body.readline() # Blank line before table. + cols, rows = memdf.util.markdown.read_hierified(body) + break + logging.debug('REC: read %d rows', len(rows)) + attrs = df.attrs + df = pd.concat([df, pd.DataFrame(data=rows, columns=cols).astype(df.dtypes)], + ignore_index=True) + df.attrs = attrs + return df.sort_values( + by=['platform', 'target', 'config', 'section']).drop_duplicates() + + +def main(argv): + status = 0 + try: + config = Config().init({ + **memdf.util.config.CONFIG, + **memdf.util.github.CONFIG, + **memdf.util.sqlite.CONFIG, + **memdf.report.OUTPUT_CONFIG, + **GITHUB_CONFIG, + **DB_CONFIG, + **REPORT_CONFIG, + }) + config.argparse.add_argument('inputs', metavar='FILE', nargs='*') + config.parse(argv) + + szc = SizeContext(config) + szc.read_inputs() + dfs = szc.report_matching_commits() + + memdf.report.write_dfs(config, + dfs, + hierify=True, + title=True, + floatfmt='5.1f') + + except Exception as exception: + raise exception + + return status + + +if __name__ == '__main__': + sys.exit(main(sys.argv)) \ No newline at end of file diff --git a/.github/scripts/memory/gh_sizes.py b/.github/scripts/memory/gh_sizes.py new file mode 100644 index 0000000..3c2aee0 --- /dev/null +++ b/.github/scripts/memory/gh_sizes.py @@ -0,0 +1,231 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +""" +This is similar to scripts/tools/memory/report_summary.py, but generates +a specific output format with a simplified interface for use in github +workflows. + +Usage: gh_sizes.py ‹platform› ‹config› ‹target› ‹binary› [‹output›] [‹option›…] + ‹platform› - Platform name, corresponding to a config file + in scripts/tools/memory/platform/ + ‹config› - Configuration identification string. + ‹target› - Build artifact identification string. + ‹binary› - Binary build artifact. + ‹output› - Output name or directory. + ‹option›… - Other options as for report_summary. + +This script also expects certain environment variables, which can be set in a +github workflow as follows: + + - name: Set up environment for size reports + if: ${{ !env.ACT }} + env: + GH_CONTEXT: ${{ toJson(github) }} + run: gh_sizes_environment.py "${GH_CONTEXT}" + +Default output file is {platform}-{configname}-{buildname}-sizes.json in the +binary's directory. This file has the form: + + { + "platform": "‹platform›", + "config": "‹config›", + "target": "‹target›", + "time": 1317645296, + "input": "‹binary›", + "event": "pull_request", + "hash": "496620796f752063616e20726561642074686973", + "parent": "20796f752061726520746f6f20636c6f73652e0a", + "pr": 12345, + "by": "section", + "ref": "refs/pull/12345/merge" + "frames": { + "section": [ + {"section": ".bss", "size": 260496}, + {"section": ".data", "size": 1648}, + {"section": ".text", "size": 740236} + ], + "region": [ + {"region": "FLASH", "size": 262144}, + {"region": "RAM", "size": 74023} + ] + } + } + +""" + +import datetime +import logging +import os +import pathlib +import sys + +import memdf.collect +import memdf.report +import memdf.select +import memdf.util +from memdf import Config, ConfigDescription, DFs, SectionDF + +PLATFORM_CONFIG_DIR = pathlib.Path('scripts/tools/memory/platform') + +CONFIG: ConfigDescription = { + 'event': { + 'help': 'Github workflow event name', + 'metavar': 'NAME', + 'default': os.environ.get('GITHUB_EVENT_NAME'), + }, + 'pr': { + 'help': 'Github PR number', + 'metavar': 'NUMBER', + 'default': int(os.environ.get('GH_EVENT_PR', '0')), + }, + 'hash': { + 'help': 'Current commit hash', + 'metavar': 'HASH', + 'default': os.environ.get('GH_EVENT_HASH'), + }, + 'parent': { + 'help': 'Parent commit hash', + 'metavar': 'HASH', + 'default': os.environ.get('GH_EVENT_PARENT'), + }, + 'ref': { + 'help': 'Target ref', + 'metavar': 'REF', + 'default': os.environ.get('GH_EVENT_REF'), + }, + 'timestamp': { + 'help': 'Build timestamp', + 'metavar': 'TIME', + 'default': int(float( + os.environ.get('GH_EVENT_TIMESTAMP') + or datetime.datetime.now().timestamp())), + }, +} + + +def main(argv): + status = 0 + + try: + _, platform, config_name, target_name, binary, *args = argv + except ValueError: + program = pathlib.Path(argv[0]) + logging.error( + """ + Usage: %s platform config target binary [output] [options] + + This is intended for use in github workflows. + For other purposes, a general program for the same operations is + %s/report_summary.py + + """, program.name, program.parent) + return 1 + + try: + config_file = pathlib.Path(platform) + if config_file.is_file(): + platform = config_file.stem + else: + config_file = (PLATFORM_CONFIG_DIR / platform).with_suffix('.cfg') + + output_base = f'{platform}-{config_name}-{target_name}-sizes.json' + if args and not args[0].startswith('-'): + out, *args = args + output = pathlib.Path(out) + if out.endswith('/') and not output.exists(): + output.mkdir(parents=True) + if output.is_dir(): + output = output / output_base + else: + output = pathlib.Path(binary).parent / output_base + + config_desc = { + **memdf.util.config.CONFIG, + **memdf.collect.CONFIG, + **memdf.select.CONFIG, + **memdf.report.OUTPUT_CONFIG, + **CONFIG, + } + # In case there is no platform configuration file, default to using a popular set of section names. + config_desc['section.select']['default'] = [ + '.text', '.rodata', '.data', '.bss'] + + config = Config().init(config_desc) + config.put('output.file', output) + config.put('output.format', 'json_records') + if config_file.is_file(): + config.read_config_file(config_file) + else: + logging.warning('Missing config file: %s', config_file) + config.parse([argv[0]] + args) + + config.put('output.metadata.platform', platform) + config.put('output.metadata.config', config_name) + config.put('output.metadata.target', target_name) + config.put('output.metadata.time', config['timestamp']) + config.put('output.metadata.input', binary) + config.put('output.metadata.by', 'section') + for key in ['event', 'hash', 'parent', 'pr', 'ref']: + if value := config[key]: + config.putl(['output', 'metadata', key], value) + + # In case there is no platform configuration file or it does not define regions, + # try to find reasonable groups. + if not config.get('region.sections'): + sections = {'FLASH': [], 'RAM': []} + for section in config.get('section.select'): + print('section:', section) + for substring, region in [('text', 'FLASH'), ('rodata', 'FLASH'), ('data', 'RAM'), ('bss', 'RAM')]: + if substring in section: + sections[region].append(section) + break + config.put('region.sections', sections) + + collected: DFs = memdf.collect.collect_files(config, [binary]) + + sections = collected[SectionDF.name] + section_summary = sections[['section', + 'size']].sort_values(by='section') + section_summary.attrs['name'] = "section" + + region_summary = memdf.select.groupby( + config, collected['section'], 'region') + region_summary.attrs['name'] = "region" + + summaries = { + 'section': section_summary, + 'region': region_summary, + } + + # Write configured (json) report to the output file. + memdf.report.write_dfs(config, summaries) + + # Write text report to stdout. + memdf.report.write_dfs(config, + summaries, + sys.stdout, + 'simple', + floatfmt='.0f') + + except Exception as exception: + raise exception + + return status + + +if __name__ == '__main__': + sys.exit(main(sys.argv)) \ No newline at end of file diff --git a/.github/scripts/memory/gh_sizes_environment.py b/.github/scripts/memory/gh_sizes_environment.py new file mode 100644 index 0000000..94d828d --- /dev/null +++ b/.github/scripts/memory/gh_sizes_environment.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 + +""" +Set up environment variables used to generate size report artifacts. + +Takes a single argument, a JSON dictionary of the `github` context. +Typically run as: + +``` + - name: Set up environment for size reports + if: ${{ !env.ACT }} + env: + GH_CONTEXT: ${{ toJson(github) }} + run: gh_sizes_environment.py "${GH_CONTEXT}" +``` + +Sets the following environment variables: + +- `GH_EVENT_PR` For a pull request, the PR number; otherwise 0. +- `GH_EVENT_HASH` SHA of the commit under test. +- `GH_EVENT_PARENT` SHA of the parent commit to which the commit under + test is applied. +- `GH_EVENT_REF` The branch or tag ref that triggered the workflow run. +- `GH_EVENT_TIMESTAMP` For `push` events only, the timestamp of the commit. +""" + +import json +import os +import re +import subprocess +import sys + +import dateutil.parser + +github = json.loads(sys.argv[1]) + +commit = None +timestamp = None +ref = github['ref'] + +if github['event_name'] == 'pull_request': + + pr = github['event']['number'] + commit = github['event']['pull_request']['head']['sha'] + + # Try to find the actual commit against which the current PR compares + # by scraping the HEAD commit message. + r = subprocess.run(['git', 'show', '--no-patch', '--format=%s', 'HEAD'], + capture_output=True, text=True, check=True) + m = re.fullmatch('Merge [0-9a-f]+ into ([0-9a-f]+)', r.stdout) + if m: + parent = m.group(1) + else: + parent = github['event']['pull_request']['base']['sha'] + +elif github['event_name'] == 'push': + + commit = github['sha'] + parent = github['event']['before'] + timestamp = dateutil.parser.isoparse( + github['event']['head_commit']['timestamp']).timestamp() + + # Try to find the PR being committed by scraping the commit message. + m = re.search(r'\(#(\d+)\)', github['event']['head_commit']['message']) + if m: + pr = m.group(1) + else: + pr = 0 + +# Environment variables for subsequent workflow steps are set by +# writing to the file named by `$GITHUB_ENV`. + +if commit is not None: + env = os.environ.get('GITHUB_ENV') + assert env + with open(env, 'at') as out: + print(f'GH_EVENT_PR={pr}', file=out) + print(f'GH_EVENT_HASH={commit}', file=out) + print(f'GH_EVENT_PARENT={parent}', file=out) + print(f'GH_EVENT_REF={ref}', file=out) + if timestamp: + print(f'GH_EVENT_TIMESTAMP={timestamp}', file=out) \ No newline at end of file diff --git a/.github/scripts/memory/memdf/README.md b/.github/scripts/memory/memdf/README.md new file mode 100644 index 0000000..2ea3c2f --- /dev/null +++ b/.github/scripts/memory/memdf/README.md @@ -0,0 +1,29 @@ +--- +orphan: true +--- + +This package contains routines to to collect, aggregate, and report memory +usage, using Pandas `DataFrame` as the primary representation. + +- memdf.collect — Helpers to read memory information from various sources + (e.g. executables) according to command line options. +- memdf.df — `DataFrame` utilities, in particular definitions of columns and + types for the main uses of data frames. +- memdf.name — Names for synthetic symbols, etc. Individual readers are + located under memdf.collector. +- memdf.report — Helpers to write data frames in various formats according to + command line or configured options. +- memdf.select — Helpers to select relevant subsets of data frames according + to command line or configured options. +- memdf.sizedb — Helpers for a database of size information. + +Modules under memdf.util are not specifically tied to memory usage. + +- memdf.util.config — `Config` utility class for managing command line or + other options according to a declarative description. +- memdf.util.github — Utilities for communicating with GitHub. +- memdf.util.markdown — Utilities for manipulating Markdown text. +- memdf.util.nd — Nested dictionary utilities, used by `Config`. +- memdf.util.pretty — Pretty-printed logging utility functions. +- memdf.util.sqlite - Utilities for connecting to a sqlite3 database. +- memdf.util.subprocess — Utilities for executing external commands. diff --git a/.github/scripts/memory/memdf/__init__.py b/.github/scripts/memory/memdf/__init__.py new file mode 100644 index 0000000..0a523fb --- /dev/null +++ b/.github/scripts/memory/memdf/__init__.py @@ -0,0 +1,24 @@ +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Package for working with memory usage information using Pandas DataFrame.""" + +from memdf.df import DF, DFs, ExtentDF, SectionDF, SegmentDF, SymbolDF +from memdf.util.config import Config, ConfigDescription + +__all__ = [ + 'DF', 'SymbolDF', 'SectionDF', 'SegmentDF', 'ExtentDF', + 'DFs', 'Config', 'ConfigDescription', +] diff --git a/.github/scripts/memory/memdf/collect.py b/.github/scripts/memory/memdf/collect.py new file mode 100644 index 0000000..18cd004 --- /dev/null +++ b/.github/scripts/memory/memdf/collect.py @@ -0,0 +1,334 @@ +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Collect information from various sources into Memory Map DataFrames.""" + +import bisect +from typing import Callable, Dict, List, Mapping, Optional, Sequence, Tuple + +import memdf.collector.bloaty +import memdf.collector.csv +import memdf.collector.elftools +import memdf.collector.readelf +import memdf.collector.su +import memdf.name +import memdf.select +import memdf.util.config +import pandas as pd # type: ignore +from elftools.elf.constants import SH_FLAGS # type: ignore +from memdf import DF, Config, ConfigDescription, DFs, ExtentDF, SectionDF, SymbolDF +from memdf.collector.util import simplify_source + +PREFIX_CONFIG: ConfigDescription = { + 'collect.prefix': { + 'help': 'Strip PATH from the beginning of source file names', + 'metavar': 'PATH', + 'default': [], + 'argparse': { + 'alias': ['--prefix', '--strip-prefix'], + 'action': 'append', + } + }, +} + +CONFIG: ConfigDescription = { + Config.group_def('input'): { + 'title': 'input options', + }, + Config.group_def('tool'): { + 'title': 'external tool options', + }, + Config.group_map('collect'): { + 'group': 'input' + }, + **memdf.collector.bloaty.CONFIG, + **memdf.collector.csv.CONFIG, + **memdf.collector.elftools.CONFIG, + **memdf.collector.readelf.CONFIG, + 'collect.method': { + 'help': + 'Method of input processing: one of' + ' elftools, readelf, bloaty, csv, tsv, su.', + 'metavar': 'METHOD', + 'choices': ['elftools', 'readelf', 'bloaty', 'csv', 'tsv', 'su'], + 'default': 'elftools', + 'argparse': { + 'alias': ['-f'], + }, + }, + **PREFIX_CONFIG, +} + +ARM_SPECIAL_SYMBOLS = frozenset(["$a", "$t", "$t.x", "$d", "$d.realdata"]) + + +def postprocess_symbols(config: Config, symbols: SymbolDF) -> SymbolDF: + """Postprocess a symbol table after collecting from one source. + + If the symbol table contains FILE symbols, they will be removed and + replaced by a 'file' column on other symbols. + + If the symbol table contains ARM mode symbols, they will be removed + and replaced by an 'arm' column on other symbols. + """ + files = [] + arms = [] + arm_symbols = {} + current_file = '' + current_arm = '' + has_file = False + if config['collect.prefix-file']: + prefixes = config.get_re('collect.prefix') + else: + prefixes = None + if 'type' in symbols.columns: + for symbol in symbols.itertuples(): + if symbol.type == 'FILE': + has_file = True + current_file = symbol.symbol + if prefixes: + current_file = simplify_source(current_file, prefixes) + + elif symbol.type == 'NOTYPE': + if symbol.symbol.startswith('$'): + if current_arm or symbol.symbol in ARM_SPECIAL_SYMBOLS: + current_arm = symbol.symbol + arm_symbols[current_arm] = True + files.append(current_file) + arms.append(current_arm) + + if has_file: + symbols['file'] = files + if current_arm: + symbols['arm'] = arms + + if has_file: + symbols = symbols[symbols['type'] != 'FILE'] + if current_arm: + syms = arm_symbols.keys() + symbols = symbols[~symbols.symbol.isin(syms)] + return symbols + + +def postprocess_file(config: Config, dfs: DFs) -> None: + """Postprocess tables after collecting from one source.""" + if SymbolDF.name in dfs: + dfs[SymbolDF.name] = postprocess_symbols(config, dfs[SymbolDF.name]) + + +def fill_holes(config: Config, symbols: SymbolDF, sections: SectionDF) -> DFs: + """Account for space not used by any symbol, or by multiple symbols.""" + + # These symbols mark the start or end of unused space. + start_unused = frozenset(config.get('symbol.free.start', [])) + end_unused = frozenset(config.get('symbol.free.end', [])) + + extent_columns = ['address', 'size', 'section', 'file'] + need_cu = 'cu' in symbols.columns + if need_cu: + extent_columns.append('cu') + need_input = 'input' in symbols.columns + if need_input: + extent_columns.append('input') + columns = ['symbol', *extent_columns, 'type', 'bind'] + + def filler(name, address, size, previous, current) -> List: + row = [ + name, # symbol + address, # address + size, # size + (previous.section if previous else + current.section if current else memdf.name.UNDEF), # section + (previous.file + if previous else current.file if current else ''), # file + ] + if need_cu: + row.append( + previous.cu if previous else current.cu if current else '') + if need_input: + row.append(previous.input if previous else current. + input if current else '') + row.append('NOTYPE') # type + row.append('LOCAL') # bind + return row + + def fill_gap(previous, current, from_address, + to_address) -> Tuple[str, List]: + """Add a row for a unaccounted gap or unused space.""" + size = to_address - from_address + if (previous is None or previous.symbol in start_unused + or current.symbol in end_unused): + use = 'unused' + name = memdf.name.unused(from_address, size) + else: + use = 'gap' + name = memdf.name.gap(from_address, size) + return (use, filler(name, from_address, size, previous, current)) + + def fill_overlap(previous, current, from_address, + to_address) -> Tuple[str, List]: + """Add a row for overlap.""" + size = to_address - from_address + return ('overlap', + filler(memdf.name.overlap(from_address, -size), from_address, + size, previous, current)) + + # Find the address range for sections that are configured or allocated. + config_sections = set() + for _, s in config.get('region.sections', {}).items(): + config_sections |= set(s) + section_to_range = {} + start_to_section = {} + section_starts = [0] + for s in sections.itertuples(): + if ((s.section in config_sections) or (s.flags & SH_FLAGS.SHF_ALLOC)): + section_to_range[s.section] = range(s.address, s.address + s.size) + start_to_section[s.address] = s.section + section_starts.append(s.address) + section_starts.sort() + + new_symbols: Dict[str, List[list]] = { + 'gap': [], + 'unused': [], + 'overlap': [] + } + section_range = None + previous_symbol = None + current_address = 0 + iterable_symbols = symbols.loc[(symbols.type != 'SECTION') + & (symbols.type != 'FILE') + & symbols.section.isin(section_to_range)] + iterable_symbols = iterable_symbols.sort_values(by='address') + + for symbol in iterable_symbols.itertuples(): + if not previous_symbol or symbol.section != previous_symbol.section: + # We sometimes see symbols that have the value of their section end + # address (so they are not actually within the section) and have + # the same address as a symbol in the next section. + symbol_address_section = start_to_section.get(section_starts[ + bisect.bisect_right(section_starts, symbol.address) - 1]) + if symbol_address_section != symbol.section: + continue + # Starting or switching sections. + if previous_symbol and section_range: + # previous_symbol is the last in its section. + if current_address < section_range[-1] + 1: + use, row = fill_gap(previous_symbol, previous_symbol, + current_address, section_range[-1] + 1) + new_symbols[use].append(row) + # Start of section. + previous_symbol = None + section_range = section_to_range.get(symbol.section) + if section_range: + current_address = section_range[0] + if section_range: + if current_address < symbol.address: + use, row = fill_gap(previous_symbol, symbol, current_address, + symbol.address) + new_symbols[use].append(row) + elif current_address > symbol.address: + use, row = fill_overlap(previous_symbol, symbol, + current_address, symbol.address) + new_symbols[use].append(row) + current_address = symbol.address + symbol.size + previous_symbol = symbol + + dfs = {k: SymbolDF(new_symbols[k], columns=columns) for k in new_symbols} + symbols = pd.concat([symbols, *dfs.values()]).fillna('') + symbols.sort_values(by='address', inplace=True) + for k in dfs: + dfs[k] = ExtentDF(dfs[k][extent_columns]) + dfs[k].attrs['name'] = k + dfs[SymbolDF.name] = SymbolDF(symbols) + return dfs + + +def postprocess_collected(config: Config, dfs: DFs) -> None: + """Postprocess tables after reading all sources.""" + + # Prune tables according to configuration options. This happens before + # fill_holes() so that space of any pruned symbols will be accounted for, + # and to avoid unnecessary work for pruned sections. + for c in [SymbolDF, SectionDF]: + if c.name in dfs: + dfs[c.name] = memdf.select.select_configured( + config, dfs[c.name], memdf.select.COLLECTED_CHOICES) + + # Account for space not used by any symbol, or by multiple symbols. + if (SymbolDF.name in dfs and SectionDF.name in dfs + and config.get('args.fill_holes', True)): + dfs.update(fill_holes(config, dfs[SymbolDF.name], dfs[SectionDF.name])) + + # Create synthetic columns (e.g. 'region') and prune tables + # according to their configuration. This happens after fill_holes() + # so that synthetic column values will be created for the gap symbols. + for c in [SymbolDF, SectionDF]: + if c.name in dfs: + for column in memdf.select.SYNTHETIC_CHOICES: + dfs[c.name] = memdf.select.synthesize_column( + config, dfs[c.name], column) + dfs[c.name] = memdf.select.select_configured_column( + config, dfs[c.name], column) + + for df in dfs.values(): + if demangle := set((c for c in df.columns if c.endswith('symbol'))): + df.attrs['demangle'] = demangle + if hexify := set((c for c in df.columns if c.endswith('address'))): + df.attrs['hexify'] = hexify + + +FileReader = Callable[[Config, str, str], DFs] + +FILE_READERS: Dict[str, FileReader] = { + 'bloaty': memdf.collector.bloaty.read_file, + 'elftools': memdf.collector.elftools.read_file, + 'readelf': memdf.collector.readelf.read_file, + 'csv': memdf.collector.csv.read_file, + 'tsv': memdf.collector.csv.read_file, + 'su': memdf.collector.su.read_dir, +} + + +def collect_files(config: Config, + files: Optional[List[str]] = None, + method: Optional[str] = None) -> DFs: + """Read a filtered memory map from a set of files.""" + filenames = files if files else config.get('args.inputs', []) + if method is None: + method = config.get('collect.method', 'csv') + frames: Dict[str, List[DF]] = {} + for filename in filenames: + dfs: DFs = FILE_READERS[method](config, filename, method) + postprocess_file(config, dfs) + for k, frame in dfs.items(): + if k not in frames: + frames[k] = [] + frames[k].append(frame) + dfs = {} + for k, v in frames.items(): + dfs[k] = pd.concat(v, ignore_index=True) + postprocess_collected(config, dfs) + return dfs + + +def parse_args(config_desc: Mapping, argv: Sequence[str]) -> Config: + """Common argument parsing for collection tools.""" + config = Config().init({ + **memdf.util.config.CONFIG, + **CONFIG, + **config_desc + }) + config.argparse.add_argument('inputs', metavar='FILE', nargs='+') + return config.parse(argv) diff --git a/.github/scripts/memory/memdf/collector/__init__.py b/.github/scripts/memory/memdf/collector/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.github/scripts/memory/memdf/collector/bloaty.py b/.github/scripts/memory/memdf/collector/bloaty.py new file mode 100644 index 0000000..d3caeb3 --- /dev/null +++ b/.github/scripts/memory/memdf/collector/bloaty.py @@ -0,0 +1,61 @@ +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Collect memory information from bloaty.""" + +import io +import os + +import memdf.util.subprocess +import pandas as pd # type: ignore +from memdf import Config, ConfigDescription, DFs, SymbolDF +from memdf.collector.util import simplify_source + +CONFIG: ConfigDescription = { + 'tool.bloaty': { + 'help': 'File name of the bloaty executable', + 'metavar': 'FILE', + 'default': 'bloaty', + }, +} + + +def read_symbols(config: Config, filename: str) -> SymbolDF: + """Read a binary's symbol map using bloaty.""" + column_map = { + 'compileunits': 'cu', + 'sections': 'section', + 'symbols': 'symbol', + 'vmsize': 'size', + } + process = memdf.util.subprocess.run_tool_pipe(config, [ + 'bloaty', '--tsv', '--demangle=none', '-n', '0', '-d', + 'compileunits,sections,symbols', filename + ]) + if not process or not process.stdout: + return SymbolDF() + df = pd.read_table(io.TextIOWrapper(process.stdout, newline=os.linesep), + usecols=list(column_map.keys()), + dtype=SymbolDF.dtype, + na_filter=False) + df.rename(inplace=True, columns=column_map) + prefixes = config.get_re('collect.prefix') + df['cu'] = df['cu'].apply(lambda s: simplify_source(s, prefixes)) + return df + + +def read_file(config: Config, filename: str, method: str = None) -> DFs: + """Read a binary's memory map using bloaty.""" + return {SymbolDF.name: read_symbols(config, filename)} diff --git a/.github/scripts/memory/memdf/collector/csv.py b/.github/scripts/memory/memdf/collector/csv.py new file mode 100644 index 0000000..9d2fe00 --- /dev/null +++ b/.github/scripts/memory/memdf/collector/csv.py @@ -0,0 +1,35 @@ +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Collect memory information from csv files.""" + +import memdf.name +import pandas as pd # type: ignore +from memdf.df import DFs +from memdf.util.config import Config, ConfigDescription + +CONFIG: ConfigDescription = {} + + +def read_file(config: Config, filename: str, method: str = ',') -> DFs: + """Read a csv or tsv file into a data frame.""" + delimiter = {'csv': ',', 'tsv': '\t'}.get(method, method) + df = pd.read_csv(open(filename, 'r'), sep=delimiter, na_filter=False) + if df_class := memdf.df.find_class(df): + df_name = df.name + df = df.astype(df_class.dtype) + else: + df_name = memdf.name.UNKNOWN + return {df_name: df} diff --git a/.github/scripts/memory/memdf/collector/elftools.py b/.github/scripts/memory/memdf/collector/elftools.py new file mode 100644 index 0000000..8aac38f --- /dev/null +++ b/.github/scripts/memory/memdf/collector/elftools.py @@ -0,0 +1,130 @@ +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Collect memory information using elftools.""" + +import pathlib + +import elftools.elf.constants # type: ignore +import elftools.elf.descriptions # type: ignore +import elftools.elf.sections # type: ignore +import memdf.name +from elftools.elf.elffile import ELFFile # type: ignore +from memdf.collector.util import simplify_source +from memdf.df import DFs, SectionDF, SegmentDF, SymbolDF +from memdf.util.config import Config, ConfigDescription + +CONFIG: ConfigDescription = {} + + +def read_segments(config: Config, ef: ELFFile) -> SegmentDF: + """Read a segment table from an ELFFile.""" + columns = ['type', 'vaddress', 'paddress', 'size', 'flags'] + rows = [] + for segment in ef.iter_segments(): + rows.append([ + segment['p_type'], + segment['p_vaddr'], segment['p_paddr'], segment['p_memsz'], + segment['p_flags'] + ]) + return SegmentDF(rows, columns=columns) + + +def read_sections(config: Config, ef: ELFFile) -> SectionDF: + """Read a section table from an ELFFile.""" + columns = ['section', 'type', 'address', 'size', 'flags', 'segment'] + index = [] + rows = [] + for i, section in enumerate(ef.iter_sections()): + index.append(i) + segment_number = -1 + for j, segment in enumerate(ef.iter_segments()): + if segment.section_in_segment(section): + segment_number = j + break + rows.append([ + section.name, + elftools.elf.descriptions.describe_sh_type(section['sh_type']), + section['sh_addr'], section['sh_size'], section['sh_flags'], + segment_number + ]) + return SectionDF(rows, index=index, columns=columns) + + +def read_symbols(config: Config, ef: ELFFile, sections: SectionDF) -> SymbolDF: + """Read a symbol table from an ELFFile.""" + section_map = dict(sections.section) + section_map.update({ + 0: memdf.name.UNDEF, + 'SHN_UNDEF': memdf.name.UNDEF, + 'SHN_ABS': memdf.name.ABS + }) + columns = ['symbol', 'address', 'size', 'section', 'type', 'bind'] + rows = [] + for section_id, section in enumerate(ef.iter_sections()): + if not isinstance(section, elftools.elf.sections.SymbolTableSection): + continue + for symbol_id, symbol in enumerate(section.iter_symbols()): + st_type = elftools.elf.descriptions.describe_symbol_type( + symbol['st_info']['type']) + st_bind = elftools.elf.descriptions.describe_symbol_bind( + symbol['st_info']['bind']) + st_shndx = symbol['st_shndx'] # TBD: look up indirect segment ids + rows.append([ + symbol.name, # column: 'symbol' + symbol['st_value'], # column: 'address' + symbol['st_size'], # column: 'size' + section_map.get(st_shndx, + memdf.name.UNKNOWN), # column: 'section' + st_type, # column: 'type' + st_bind, # column: 'bind' + ]) + return SymbolDF(rows, columns=columns) + + +def cu_offset_to_path_map(config: Config, dwarf_info): + """Return a map from Dwarf compilation unit offsets to source paths.""" + prefixes = config.get_re('collect.prefix') + address_map = {} + for compilation_unit in dwarf_info.iter_CUs(): + path = pathlib.Path(compilation_unit.get_top_DIE().get_full_path()) + source = simplify_source(str(path.resolve()), prefixes) + address_map[compilation_unit.cu_offset] = source + return address_map + + +def read_file(config: Config, filename: str, method: str = None) -> DFs: + """Collect memory information using elftools.""" + with open(filename, 'rb') as fp: + ef = ELFFile(fp) + segments = read_segments(config, ef) + sections = read_sections(config, ef) + symbols = read_symbols(config, ef, sections) + + if config['args.need_cu']: + dwarf_info = ef.get_dwarf_info() + aranges = dwarf_info.get_aranges() + m = cu_offset_to_path_map(config, dwarf_info) + symbols['cu'] = symbols['address'].apply(lambda a: m.get( + aranges.cu_offset_at_addr(a), '')).astype('string') + + if config['args.tag_inputs']: + symbols['input'] = filename + + return { + SegmentDF.name: segments, + SectionDF.name: sections, + SymbolDF.name: symbols + } diff --git a/.github/scripts/memory/memdf/collector/readelf.py b/.github/scripts/memory/memdf/collector/readelf.py new file mode 100644 index 0000000..d2b1aaa --- /dev/null +++ b/.github/scripts/memory/memdf/collector/readelf.py @@ -0,0 +1,288 @@ +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Collect memory information using `readelf` and `nm`.""" + +import io +import re +from typing import Dict, List + +import elftools.elf.constants # type: ignore +import memdf.name +import memdf.util.subprocess +import pandas as pd # type: ignore +from memdf.collector.util import simplify_source +from memdf.df import DFs, SectionDF, SegmentDF, SymbolDF, SymbolSourceDF +from memdf.util.config import Config, ConfigDescription + +NM_CONFIG: ConfigDescription = { + 'tool.nm': { + 'help': 'File name of the nm executable', + 'metavar': 'FILE', + 'default': 'nm', + }, +} + +READELF_CONFIG: ConfigDescription = { + 'tool.readelf': { + 'help': 'File name of the readelf executable', + 'metavar': 'FILE', + 'default': 'readelf', + }, +} + +CONFIG: ConfigDescription = { + **NM_CONFIG, + **READELF_CONFIG, +} + + +def read_sources(config: Config, filename: str) -> SymbolSourceDF: + """Reads a binary's symbol-to-compilation-unit map using nm""" + # TBD: figure out how to get this via readelf. + prefixes = config.get_re('collect.prefix') + process = memdf.util.subprocess.run_tool_pipe(config, + ['nm', '-l', filename]) + if not process or not process.stdout: + return SymbolSourceDF() + text = io.TextIOWrapper(process.stdout) + + decoder = re.compile( + r"""^((?P
[0-9a-fA-F]+)|\s+?) + \s(?P\S) + \s(?P\S+) + (\t(?P\S+):(?P\d+))? + """, re.VERBOSE) + columns = ['address', 'kind', 'symbol', 'cu'] + rows = [] + while line := text.readline(): + if not (match := decoder.match(line.rstrip())): + continue + if a := match.group('address'): + address = int(a, 16) + else: + address = 0 + source = match.group('source') or '' + if source: + source = simplify_source(source, prefixes) + rows.append( + [address, + match.group('kind'), + match.group('symbol'), source]) + return SymbolSourceDF(rows, columns=columns) + + +def add_cu(config: Config, filename: str, symbols: SymbolDF) -> SymbolDF: + """Add a 'cu' (compilation unit) column to a symbol table.""" + sources = read_sources(config, filename).set_index(['symbol', 'address']) + symbols = pd.merge(symbols, sources, on=('symbol', 'address'), how='left') + symbols.fillna({'cu': ''}, inplace=True) + return symbols + + +def decode_section_flags(sflags: str) -> int: + """Map readelf's representation of section flags to ELF flag values.""" + d = { + 'W': elftools.elf.constants.SH_FLAGS.SHF_WRITE, + 'A': elftools.elf.constants.SH_FLAGS.SHF_ALLOC, + 'X': elftools.elf.constants.SH_FLAGS.SHF_EXECINSTR, + 'M': elftools.elf.constants.SH_FLAGS.SHF_MERGE, + 'S': elftools.elf.constants.SH_FLAGS.SHF_STRINGS, + 'I': elftools.elf.constants.SH_FLAGS.SHF_INFO_LINK, + 'L': elftools.elf.constants.SH_FLAGS.SHF_LINK_ORDER, + 'O': elftools.elf.constants.SH_FLAGS.SHF_OS_NONCONFORMING, + 'G': elftools.elf.constants.SH_FLAGS.SHF_GROUP, + 'T': elftools.elf.constants.SH_FLAGS.SHF_TLS, + 'C': 0x800, # SHF_COMPRESSED + 'E': elftools.elf.constants.SH_FLAGS.SHF_EXCLUDE, + 'y': 0x20000000, # SHF_ARM_PURECODE + } + flags = 0 + for k, v in d.items(): + if k in sflags: + flags |= v + return flags + + +def decode_segment_flags(sflags: str) -> int: + """Map readelf's representation of segment flags to ELF flag values.""" + return ((int('R' in sflags) << 2) | (int('W' in sflags) << 1) | + (int('E' in sflags))) + + +def read_segments(text: io.TextIOWrapper) -> SegmentDF: + """Read a segment table from readelf output.""" + decoder = re.compile( + r"""^(?P\w+) + \s+(?P0x[0-9a-fA-F]+) + \s+(?P0x[0-9a-fA-F]+) + \s+(?P0x[0-9a-fA-F]+) + \s+(?P0x[0-9a-fA-F]+) + \s+(?P0x[0-9a-fA-F]+) + \s(?P.*) + \s+0x(?P[0-9a-fA-F]+) + """, re.VERBOSE) + columns = ['type', 'vaddress', 'paddress', 'size', 'flags'] + rows = [] + while line := text.readline(): + if not (match := decoder.match(line.strip())): + break + rows.append([ + match.group('type'), + int(match.group('vaddress'), 16), + int(match.group('paddress'), 16), + int(match.group('size'), 16), + decode_segment_flags(match.group('flags')), + ]) + return SegmentDF(rows, columns=columns) + + +def read_section_to_segment(text: io.TextIOWrapper) -> Dict[str, int]: + """Read a section-to-segment map from readelf output.""" + section_to_segment = {} + while line := text.readline().strip(): + s = line.split() + segment = int(s[0], 10) + for section in s[1:]: + section_to_segment[section] = segment + return section_to_segment + + +def read_sections(text: io.TextIOWrapper) -> SectionDF: + """Read a section table from readelf output.""" + columns = ['section', 'type', 'address', 'size', 'flags'] + rows = [] + decoder = re.compile( + r"""^\[(?P[\s\d]+)\] + \s+(?P
\S*) + \s+(?P\S+) + \s+(?P
[0-9a-fA-F]+) + \s+(?P[0-9a-fA-F]+) + \s+(?P[0-9a-fA-F]+) + \s+(?P[0-9a-fA-F]+) + \s(?P.*) + \s(?P\d+) + \s+(?P\d+) + \s+(?P\d+) + """, re.VERBOSE) + while line := text.readline(): + if not (match := decoder.match(line.strip())): + break + rows.append([ + match.group('section'), + match.group('type'), + int(match.group('address'), 16), + int(match.group('size'), 16), + decode_section_flags(match.group('flags')), + ]) + return SectionDF(rows, columns=columns) + + +def read_symbols(text: io.TextIOWrapper) -> SymbolDF: + """Read a symbol table from readelf output.""" + columns = ['symbol', 'address', 'size', 'type', 'bind', 'shndx'] + rows = [] + decoder = re.compile( + r"""^(?P\d+): + \s+(?P
[0-9a-fA-F]+) + \s+(?P\d+) + \s+(?P\S+) + \s+(?P\S+) + \s+(?P\S+) + \s+(?P\S+) + \s*(?P\S*) + """, re.VERBOSE) + while line := text.readline(): + if not (match := decoder.match(line.strip())): + break + symbol = match.group('symbol') + stype = match.group('type') + rows.append([ + symbol, + int(match.group('address'), 16), + int(match.group('size'), 10), + stype, + match.group('bind'), + match.group('shndx'), + ]) + return SymbolDF(rows, columns=columns) + + +def read_file(config: Config, filename: str, method: str = None) -> DFs: + """Read a binary's memory map using readelf.""" + process = memdf.util.subprocess.run_tool_pipe(config, [ + 'readelf', '--wide', '--segments', '--sections', '--symbols', filename + ]) + if not process or not process.stdout: + return SegmentDF() + segment_frames: List[SegmentDF] = [] + section_frames: List[SectionDF] = [] + symbol_frames: List[SymbolDF] = [] + section_to_segment = {} + text = io.TextIOWrapper(process.stdout) + while line := text.readline(): + line = line.strip() + if line.startswith('Section Headers'): + text.readline() + section_frames.append(read_sections(text)) + elif line.startswith('Program Headers'): + text.readline() + segment_frames.append(read_segments(text)) + elif line.startswith('Section to Segment'): + text.readline() + section_to_segment.update(read_section_to_segment(text)) + elif line.startswith('Symbol table'): + text.readline() + symbol_frames.append(read_symbols(text)) + + if segment_frames: + segments = SegmentDF(pd.concat(segment_frames, ignore_index=True)) + else: + segments = SegmentDF() + if section_frames: + sections = SectionDF(pd.concat(section_frames, ignore_index=True)) + else: + sections = SectionDF() + if symbol_frames: + symbols = SymbolDF(pd.concat(symbol_frames, ignore_index=True)) + else: + symbols = SymbolDF() + + # Add segment column to sections. + sections['segment'] = sections['section'].apply( + lambda s: section_to_segment.get(s, memdf.name.UNKNOWN)) + + # Add section name column to symbols. + section_map = {str(k): v for k, v in sections['section'].items()} + section_map.update({ + '0': memdf.name.UNDEF, + 'UND': memdf.name.UNDEF, + 'ABS': memdf.name.ABS + }) + if 'shndx' in symbols.columns: + symbols['section'] = symbols['shndx'].apply(lambda s: section_map.get( + s, memdf.name.UNKNOWN)) + symbols.drop(columns='shndx') + else: + symbols['section'] = '' + + if config['args.need_cu']: + symbols = add_cu(config, filename, symbols) + + return { + SegmentDF.name: segments, + SectionDF.name: sections, + SymbolDF.name: symbols + } diff --git a/.github/scripts/memory/memdf/collector/su.py b/.github/scripts/memory/memdf/collector/su.py new file mode 100644 index 0000000..cb84e3d --- /dev/null +++ b/.github/scripts/memory/memdf/collector/su.py @@ -0,0 +1,72 @@ +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Collect memory information from gcc .su files.""" + +import os +import os.path +import re +from typing import IO + +import pandas as pd # type: ignore +from memdf.df import DFs, StackDF +from memdf.util.config import Config, ConfigDescription + +CONFIG: ConfigDescription = {} + + +def read_su(config: Config, infile: IO) -> StackDF: + columns = ['symbol', 'type', 'size', 'file', 'line'] + rows = [] + decoder = re.compile( + r"""^(?P.+) + :(?P\d+) + :(?P\d+) + :(?P.+) + [\t](?P\d+) + [\t](?P\w+) + """, re.VERBOSE) + for line in infile: + if match := decoder.match(line.strip()): + rows.append([ + match.group('symbol'), + match.group('type'), + int(match.group('size')), + match.group('file'), + int(match.group('line')), + ]) + return StackDF(rows, columns=columns) + + +def read_file(config: Config, filename: str, method: str = None) -> DFs: + """Read a single `.su` file.""" + with open(filename, 'r') as fp: + return {StackDF.name: read_su(config, fp)} + + +def read_dir(config: Config, dirname: str, method: str = None) -> DFs: + """Walk a directory reading all `.su` files.""" + frames = [] + su_re = re.compile(r".+\.su") + for path, dirnames, filenames in os.walk(dirname): + for filename in filenames: + if su_re.fullmatch(filename): + with open(os.path.join(path, filename), 'r') as fp: + frames.append(read_su(config, fp)) + if frames: + df = StackDF(pd.concat(frames, ignore_index=True)) + else: + df = StackDF() + return {StackDF.name: df} diff --git a/.github/scripts/memory/memdf/collector/util.py b/.github/scripts/memory/memdf/collector/util.py new file mode 100644 index 0000000..f5d597a --- /dev/null +++ b/.github/scripts/memory/memdf/collector/util.py @@ -0,0 +1,35 @@ +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Miscellaneous collection utilities""" + +import os +import re +from typing import Optional + + +def simplify_source(source: str, prefixes: Optional[re.Pattern]) -> str: + """Simplify a source file path.""" + r = source + if prefixes: + if (m := prefixes.match(source)): + r = r[m.end():] + if r.startswith(os.path.sep): + r = r[len(os.path.sep):] + while r.startswith('..'): + r = r[2:] + if r.startswith(os.path.sep): + r = r[len(os.path.sep):] + return r diff --git a/.github/scripts/memory/memdf/df.py b/.github/scripts/memory/memdf/df.py new file mode 100644 index 0000000..a91e44c --- /dev/null +++ b/.github/scripts/memory/memdf/df.py @@ -0,0 +1,130 @@ +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""DataFrame utilities.""" + +from typing import Dict + +import numpy as np # type: ignore +import pandas as pd # type: ignore + + +class DF(pd.DataFrame): # pylint: disable=too-many-ancestors + """DataFrame builder with default columns and types.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + for c in self.required: + if c not in self.columns: + self[c] = pd.Series() + types = {c: self.dtype[c] for c in self.columns if c in self.dtype} + typed_columns = list(types.keys()) + self[typed_columns] = self.astype(types, copy=False)[typed_columns] + self.attrs['name'] = self.name + + +class SymbolSourceDF(DF): # pylint: disable=too-many-ancestors + """Maps symbol to compilation unit""" + name: str = 'symbolsource' + required = frozenset(['symbol', 'address', 'cu']) + dtype = { + 'symbol': 'string', + 'address': np.int64, + 'cu': 'string', + 'line': np.int64, + } + + +class SegmentDF(DF): # pylint: disable=too-many-ancestors + """Segment memory map""" + name: str = 'segment' + required = frozenset(['type', 'vaddress', 'paddress', 'size']) + dtype = { + 'type': 'string', + 'vaddress': np.int64, + 'paddress': np.int64, + 'size': np.int64, + 'flags': np.int32 + } + + +class SectionDF(DF): # pylint: disable=too-many-ancestors + """Section memory map""" + name: str = 'section' + required = frozenset(['section', 'type', 'address', 'size']) + dtype = { + 'section': 'string', + 'type': 'string', + 'address': np.int64, + 'size': np.int64, + 'flags': np.int32, + 'segment': np.int32, + } + + +class SymbolDF(DF): # pylint: disable=too-many-ancestors + """Symbol table""" + name: str = 'symbol' + required = frozenset(['symbol', 'type', 'address', 'size']) + dtype = { + 'symbol': 'string', + 'type': 'string', + 'address': np.int64, + 'size': np.int64, + 'shndx': 'string' + } + + +class ExtentDF(DF): # pylint: disable=too-many-ancestors + """Gaps between symbols""" + name: str = 'gap' + required = frozenset(['address', 'size', 'section']) + dtype = { + 'address': np.int64, + 'size': np.int64, + 'section': 'string' + } + + +class StackDF(DF): # pylint: disable=too-many-ancestors + """Stack usage table""" + name: str = 'stack' + required = frozenset(['symbol', 'type', 'size']) + dtype = { + 'symbol': 'string', + 'type': 'string', + 'size': np.int64, + 'file': 'string', + 'line': np.int64, + } + + +def find_class(df: pd.DataFrame): + """Find a core DF subclass for a data frame. + + Given a arbitrary pandas DataFrame, determine whether it is usable + as one of the main memory map tables (symbol, section, segment) + by checking whether the required columns are present. + """ + if isinstance(df, DF): + return type(df) + for c in [SymbolDF, SectionDF, SegmentDF]: + if c.required.issubset(df.columns): + return c + return None + + +DFs = Dict[str, DF] diff --git a/.github/scripts/memory/memdf/name.py b/.github/scripts/memory/memdf/name.py new file mode 100644 index 0000000..2b80794 --- /dev/null +++ b/.github/scripts/memory/memdf/name.py @@ -0,0 +1,39 @@ +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""Special section/symbol names.""" + +ABS = '*ABS*' +UNDEF = '*UNDEF*' +UNKNOWN = '*unknown*' +OTHER = '*other*' +TOTAL = '*total*' + +GAP_PREFIX = '*GAP_' +UNUSED_PREFIX = '*UNUSED_' +OVERLAP_PREFIX = '*OVERLAP_' + + +def gap(address: int, size: int) -> str: + return f'{GAP_PREFIX}{address:X}_{size:X}*' + + +def unused(address: int, size: int) -> str: + return f'{UNUSED_PREFIX}{address:X}_{size:X}*' + + +def overlap(address: int, size: int) -> str: + return f'{OVERLAP_PREFIX}{address:X}_{size:X}*' diff --git a/.github/scripts/memory/memdf/report.py b/.github/scripts/memory/memdf/report.py new file mode 100644 index 0000000..4e27aa2 --- /dev/null +++ b/.github/scripts/memory/memdf/report.py @@ -0,0 +1,508 @@ +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Memory DataFrame output and related utilities.""" + +import contextlib +import io +import json +import pathlib +import sys +from typing import IO, Any, Callable, Dict, List, Mapping, Optional, Protocol, Sequence, Union + +import cxxfilt # type: ignore +import memdf.df +import memdf.select +import memdf.util.pretty +import pandas as pd # type: ignore +from memdf import DF, Config, ConfigDescription, DFs +from memdf.util.config import ParseSizeAction + +REPORT_DEMANGLE_CONFIG: ConfigDescription = { + Config.group_map('report'): { + 'group': 'output' + }, + 'report.demangle': { + 'help': 'Demangle C++ symbol names', + 'default': False, + 'argparse': { + 'alias': ['--demangle', '-C'], + 'action': 'store_true', + }, + }, +} + +REPORT_LIMIT_CONFIG: ConfigDescription = { + Config.group_map('report'): { + 'group': 'output' + }, + 'report.limit': { + 'help': 'Limit display to items above the given size', + 'metavar': 'BYTES', + 'default': 0, + 'argparse': { + 'alias': ['--limit'], + 'action': ParseSizeAction, + }, + } +} + +REPORT_CONFIG: ConfigDescription = { + **REPORT_DEMANGLE_CONFIG, + **REPORT_LIMIT_CONFIG, +} + + +def postprocess_report_by(config: Config, key: str, info: Mapping) -> None: + """For --report-by=region, select all sections.""" + assert key == 'report.by' + if config.get(key) == 'region': + config.put('section.select-all', True) + + +REPORT_BY_CONFIG: ConfigDescription = { + 'report.by': { + 'help': 'Reporting group', + 'metavar': 'GROUP', + 'choices': memdf.select.SELECTION_CHOICES, + 'default': 'section', + 'argparse': { + 'alias': ['--by'], + }, + 'postprocess': postprocess_report_by, + }, +} + + +def demangle(symbol: str): + try: + symbol = cxxfilt.demangle(symbol, external_only=False) + except cxxfilt.InvalidName: + pass + return symbol + + +def hierify_rows(table: Sequence[Sequence[Any]]) -> List[List[Any]]: + if not table: + return table + persist = None + rows = [] + for row in table: + if persist is None: + persist = [None] * len(row) + new_persist = [] + new_row = [] + changed = False + for old, new in zip(persist, list(row)): + if not changed and isinstance(new, str) and new == old: + new_row.append('') + new_persist.append(old) + else: + changed = True + new_row.append(new) + new_persist.append(new) + rows.append(new_row) + persist = new_persist + return rows + + +def hierify(df: pd.DataFrame) -> pd.DataFrame: + columns = list(df.columns) + rows = hierify_rows(df.itertuples(index=False)) + r = pd.DataFrame(rows, columns=columns) + r.attrs = df.attrs + return r + + +# Output + +OUTPUT_FILE_CONFIG: ConfigDescription = { + Config.group_def('output'): { + 'title': 'output options', + }, + 'output.file': { + 'help': 'Output file', + 'metavar': 'FILENAME', + 'default': None, + 'argparse': { + 'alias': ['--output', '-O'], + }, + }, +} + + +def postprocess_output_metadata(config: Config, key: str, + info: Mapping) -> None: + """For --output-metadata=KEY:VALUE list, convert to dictionary.""" + assert key == 'output.metadata' + metadata = {} + for s in config.get(key): + if ':' in s: + k, v = s.split(':', 1) + else: + k, v = s, True + metadata[k] = v + config.put(key, metadata) + + +OutputOption = Union[IO, str, None] + + +@contextlib.contextmanager +def open_output(config: Config, + output: OutputOption = None, + suffix: Optional[str] = None): + if isinstance(output, io.IOBase): + yield output + return + if isinstance(output, str): + filename = output + else: + filename = config['output.file'] + if (not filename) or (filename == '-'): + yield sys.stdout + return + if suffix: + filename += suffix + f = open(filename, 'w') + yield f + f.close() + + +# Single-table writers. + +def write_nothing(config: Config, df: DF, output: IO, **_kwargs) -> None: + pass + + +def write_text(config: Config, df: DF, output: IO, **_kwargs) -> None: + """Write a memory usage data frame as a human-readable table.""" + memdf.util.pretty.debug(df) + if df.shape[0]: + df = df.copy() + last_column_is_left_justified = False + formatters = [] + for column in df.columns: + if pd.api.types.is_string_dtype(df.dtypes[column]): + df[column] = df[column].astype(str) + # Left justify strings. + width = max(len(column), df[column].str.len().max()) + formatters.append(lambda x: x.ljust(width)) + if column == df.columns[-1]: + last_column_is_left_justified = True + else: + formatters.append(str) + s = df.to_string(index=False, formatters=formatters, justify='left') + if last_column_is_left_justified: + # Strip trailing spaces. + for line in s.split('\n'): + print(line.rstrip()) + else: + print(s, file=output) + else: + # No rows. `df.to_string()` doesn't look like a text table in this case. + print(' '.join(df.columns)) + + +def write_json(_config: Config, df: DF, output: IO, **kwargs) -> None: + """Write a memory usage data frame as json.""" + orient = kwargs.get('method', 'records') + # .removeprefix('json_') in 3.9 + if orient.startswith('json_'): + orient = orient[5:] + df.to_json(output, orient=orient) + + +def write_csv(_config: Config, df: DF, output: IO, **kwargs) -> None: + """Write a memory usage data frame in csv or tsv form.""" + keywords = ('sep', 'na_rep', 'float_format', 'columns', 'header', 'index', + 'index_label', 'quoting', 'quotechar', 'line_terminator', + 'date_format', 'doublequote', 'escapechar', 'decimal') + args = {k: kwargs[k] for k in keywords if k in kwargs} + df.to_csv(output, **args) + + +def write_markdown(_config: Config, df: DF, output: IO, **kwargs) -> None: + """Write a memory usage data frame as markdown.""" + keywords = ('index', 'headers', 'showindex', 'tablefmt', 'numalign', + 'stralign', 'disable_numparse', 'colalign', 'floatfmt') + args = {k: kwargs[k] for k in keywords if k in kwargs} + if 'tablefmt' not in args: + args['tablefmt'] = kwargs.get('method', 'pipe') + df.to_markdown(output, **args) + print(file=output) + + +# Multi-table writers. + +class DFsWriter(Protocol): + """Type checking for multiple table writers.""" + + def __call__(self, config: Config, dfs: DFs, output: OutputOption, + writer: Callable, **kwargs) -> None: + pass + + +dfname_count = 0 + + +def dfname(df: DF, k: str = 'unknown') -> str: + """Get a name for a data frame.""" + try: + return df.name + except AttributeError: + if c := memdf.df.find_class(df): + return c.name + global dfname_count + dfname_count += 1 + return k + str(dfname_count) + + +def write_one(config: Config, frames: DFs, output: OutputOption, + writer: Callable, **kw) -> None: + """Write a group of of memory usage data frames to a single file.""" + with open_output(config, output) as out: + sep = '' + for df in frames.values(): + print(end=sep, file=out) + if kw.get('title') and 'titlefmt' in kw and 'title' in df.attrs: + print(kw['titlefmt'].format(df.attrs['title']), file=out) + sep = '\n' + writer(config, df, out, **kw) + + +def write_many(config: Config, frames: DFs, output: OutputOption, + writer: Callable, **kwargs) -> None: + """Write a group of memory usage data frames to multiple files.""" + if (suffix := kwargs.get('suffix')) is None: + if isinstance(output, str) and (suffix := pathlib.Path(output).suffix): + pass + elif 'method' in kwargs: + suffix = '.' + kwargs['method'] + else: + suffix = '' + for df in frames.values(): + name = dfname(df) + with open_output(config, output, f'-{name}{suffix}') as out: + writer(config, df, out, **kwargs) + + +def write_jsons(config: Config, frames: DFs, output: OutputOption, + writer: Callable, **kwargs) -> None: + """Write a group of memory usage data frames as a json dictionary.""" + with open_output(config, output) as out: + print('{', file=out) + if metadata := config['output.metadata']: + for k, v in metadata.items(): + print(f' {json.dumps(k)}: {json.dumps(v)},', file=out) + print(' "frames": ', file=out, end='') + sep = '{' + for df in frames.values(): + name = df.attrs.get('name', df.attrs.get('title', dfname(df))) + print(sep, file=out) + sep = ',' + print(f' {json.dumps(name)}: ', file=out, end='') + writer(config, df, out, indent=6, **kwargs) + print('}}', file=out) + + +def write_none(_config: Config, _frames: DFs, _output: OutputOption, + _writer: Callable, **_kwargs) -> None: + pass + + +def kwgetset(k: str, *args): + r = set() + for i in args: + r |= set(i.get(k, set())) + return r + + +def prep(config: Config, df: pd.DataFrame, kw: Dict) -> pd.DataFrame: + """Preprocess a table for output.""" + def each_column(k: str): + for column in set(df.attrs.get(k, set()) | kw.get(k, set())): + if column in df.columns: + yield column + + def maybe_copy(copied, df): + return (True, df if copied else df.copy()) + + copied = False + + if config['report.demangle']: + for column in each_column('demangle'): + copied, df = maybe_copy(copied, df) + df[column] = df[column].apply(demangle) + + for column in each_column('hexify'): + copied, df = maybe_copy(copied, df) + width = (int(df[column].max()).bit_length() + 3) // 4 + df[column] = df[column].apply( + lambda x: '{0:0{width}X}'.format(x, width=width)) + + if kw.get('hierify'): + df = hierify(df) + + return df + + +class Writer: + def __init__(self, + group: Callable, + single: Callable, + defaults: Optional[Dict] = None, + overrides: Optional[Dict] = None): + self.group = group + self.single = single + self.defaults = defaults or {} + self.overrides = overrides or {} + + def write_df(self, + config: Config, + frame: pd.DataFrame, + output: OutputOption = None, + **kwargs) -> None: + args = self._args(kwargs) + with open_output(config, output) as out: + self.single(config, prep(config, frame, args), out, **args) + + def write_dfs(self, + config: Config, + frames: DFs, + output: OutputOption = None, + **kwargs) -> None: + """Write a group of memory usage data frames.""" + args = self._args(kwargs) + frames = {k: prep(config, df, args) for k, df in frames.items()} + self.group(config, frames, output, self.single, **args) + + def _args(self, kw: Mapping) -> Dict: + r = self.defaults.copy() + r.update(kw) + r.update(self.overrides) + return r + + +class MarkdownWriter(Writer): + def __init__(self, + defaults: Optional[Dict] = None, + overrides: Optional[Dict] = None): + d = {'index': False} + d.update(defaults or {}) + super().__init__(write_one, write_markdown, d, overrides) + + +class JsonWriter(Writer): + def __init__(self, + defaults: Optional[Dict] = None, + overrides: Optional[Dict] = None): + super().__init__(write_jsons, write_json, defaults, overrides) + self.overrides['hierify'] = False + + +class CsvWriter(Writer): + def __init__(self, + defaults: Optional[Dict] = None, + overrides: Optional[Dict] = None): + d = {'index': False} + d.update(defaults or {}) + super().__init__(write_many, write_csv, d, overrides) + self.overrides['hierify'] = False + + +WRITERS: Dict[str, Writer] = { + 'none': Writer(write_none, write_nothing), + 'text': Writer(write_one, write_text, {'titlefmt': '\n{}\n'}), + 'json_split': JsonWriter(), + 'json_records': JsonWriter(), + 'json_index': JsonWriter(), + 'json_columns': JsonWriter(), + 'json_values': JsonWriter(), + 'json_table': JsonWriter(), + 'csv': CsvWriter({'sep': ','}), + 'tsv': CsvWriter({'sep': '\t'}), + 'plain': MarkdownWriter({'titlefmt': '\n{}\n'}), + 'simple': MarkdownWriter({'titlefmt': '\n{}\n'}), + 'grid': MarkdownWriter({'titlefmt': '\n\n'}), + 'fancy_grid': MarkdownWriter({'titlefmt': '\n\n'}), + 'html': MarkdownWriter({'titlefmt': '

'}), + 'unsafehtml': MarkdownWriter({'titlefmt': '

'}), + 'github': MarkdownWriter(), + 'pipe': MarkdownWriter(), + 'orgtbl': MarkdownWriter(), + 'jira': MarkdownWriter(), + 'presto': MarkdownWriter(), + 'pretty': MarkdownWriter(), + 'psql': MarkdownWriter(), + 'rst': MarkdownWriter(), + 'mediawiki': MarkdownWriter(), + 'moinmoin': MarkdownWriter(), + 'youtrack': MarkdownWriter(), + 'latex': MarkdownWriter(), + 'latex_raw': MarkdownWriter(), + 'latex_booktabs': MarkdownWriter(), + 'latex_longtable': MarkdownWriter(), + 'textile': MarkdownWriter(), +} + +OUTPUT_FORMAT_CONFIG: ConfigDescription = { + Config.group_def('output'): { + 'title': 'output options', + }, + 'output.format': { + 'help': f'Output format: one of {", ".join(WRITERS)}.', + 'metavar': 'FORMAT', + 'default': 'simple', + 'choices': list(WRITERS.keys()), + 'argparse': { + 'alias': ['--to', '-t'], + }, + }, + 'output.metadata': { + 'help': 'Metadata for JSON', + 'metavar': 'NAME:VALUE', + 'default': [], + 'argparse': { + 'alias': ['--metadata'] + }, + 'postprocess': postprocess_output_metadata, + } +} + +OUTPUT_CONFIG: ConfigDescription = { + **OUTPUT_FILE_CONFIG, + **OUTPUT_FORMAT_CONFIG, +} + + +def write_dfs(config: Config, + frames: DFs, + output: OutputOption = None, + method: Optional[str] = None, + **kwargs) -> None: + """Write a group of memory usage data frames.""" + kwargs['method'] = method or config['output.format'] + WRITERS[kwargs['method']].write_dfs(config, frames, output, **kwargs) + + +def write_df(config: Config, + frame: DF, + output: OutputOption = None, + method: Optional[str] = None, + **kwargs) -> None: + """Write a memory usage data frame.""" + kwargs['method'] = method or config['output.format'] + WRITERS[kwargs['method']].write_df(config, frame, output, **kwargs) diff --git a/.github/scripts/memory/memdf/select.py b/.github/scripts/memory/memdf/select.py new file mode 100644 index 0000000..3cb1b38 --- /dev/null +++ b/.github/scripts/memory/memdf/select.py @@ -0,0 +1,169 @@ +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Data frame selection utilities.""" + +from typing import Mapping, Optional + +import memdf.name +import memdf.util.config +import memdf.util.pretty +import numpy as np # type: ignore +from memdf import DF, Config, ConfigDescription + + +def split_size(config: Config, key: str) -> None: + """Split a name:size configuration value. + + When a program supports a size threshold for selection or summary, + this can be specificed for a particular item with a suffix on the + configuration, e.g. `--section=.text:16K`. + + Given a configuration key `col.select` referring to such a list of + arguments, this function strips any sizes from those arguments + and stores them as a name:size dictionary in `col.limit`. + """ + src = key.split('.') + dst = src[:-1] + ['limit'] + splits = [s.split(':') for s in config.getl(src, [])] + config.putl(src, [x[0] for x in splits]) + config.putl(dst, { + x[0]: memdf.util.config.parse_size(x[1]) + for x in splits if len(x) > 1 + }) + + +def get_limit(config: Config, column: str, name: str) -> int: + return config.getl([column, 'limit', name], config.get('report.limit', 0)) + + +def postprocess_selections(config: Config, key: str, info: Mapping) -> None: + """Resolve select/ignore command options.""" + split_size(config, key) + choice, select = key.split('.') + assert select == 'select' + selections = config.get(key) + if not config.getl([choice, 'ignore-all'], False): + if defaults := config.getl([choice, 'default']): + for i in config.getl([choice, 'ignore']): + if i in defaults: + defaults.remove(i) + selections += defaults + config.put(key, frozenset(selections)) + + +def select_and_ignore_config_desc(key: str) -> ConfigDescription: + return { + Config.group_map(key): { + 'group': 'select' + }, + f'{key}.select': { + 'help': + f'{key.capitalize()}(s) to process; otherwise all not ignored', + 'metavar': 'NAME', + 'default': [], + 'argparse': { + 'alias': [f'--{key}'], + }, + 'postprocess': postprocess_selections + }, + f'{key}.select-all': { + 'help': f'Select all {key}s', + 'default': False, + }, + key + '.ignore': { + 'help': f'{key.capitalize()}(s) to ignore', + 'metavar': 'NAME', + 'default': [], + }, + f'{key}.ignore-all': { + 'help': f'Ignore all {key}s unless explicitly selected', + 'default': False, + }, + } + + +SECTION_CONFIG = select_and_ignore_config_desc('section') +SYMBOL_CONFIG = select_and_ignore_config_desc('symbol') +REGION_CONFIG = select_and_ignore_config_desc('region') + +CONFIG: ConfigDescription = { + Config.group_def('select'): { + 'title': 'selection options', + }, + **SECTION_CONFIG, + **SYMBOL_CONFIG, + **REGION_CONFIG, +} + +COLLECTED_CHOICES = ['symbol', 'section'] +SYNTHETIC_CHOICES = ['region'] +SELECTION_CHOICES = COLLECTED_CHOICES + SYNTHETIC_CHOICES + + +def is_selected(config: Config, column, name) -> bool: + """Test `name` against the configured selection criteria for `column`.""" + if config.getl([column, 'select-all']): + return True + if name in config.getl([column, 'select'], []): + return True + return False + + +def synthesize_region(config: Config, df: DF, column: str) -> DF: + """Add a 'region' column derived from the 'section' column.""" + cmap = config.transpose_dictlist(config.get('region.sections', {})) + memdf.util.pretty.debug(cmap) + df[column] = df['section'].map(lambda x: cmap.get(x, memdf.name.UNKNOWN)) + return df + + +def groupby_region(df: DF): + return df[(df['size'] > 0) | (df['region'] != memdf.name.UNKNOWN)] + + +SYNTHESIZE = { + 'region': (synthesize_region, groupby_region), +} + + +def synthesize_column(config: Config, df: DF, column: str) -> DF: + if column not in df.columns: + SYNTHESIZE[column][0](config, df, column) + return df + + +def select_configured_column(config: Config, df: DF, column: str) -> DF: + """Apply configured selection options to a column""" + if column in df and not config.getl([column, 'select-all']): + selections = config.getl([column, 'select'], []) + if selections: + df = df.loc[df[column].isin(selections)] + return df + + +def select_configured(config: Config, df: DF, columns=SELECTION_CHOICES) -> DF: + for column in columns: + df = select_configured_column(config, df, column) + return df + + +def groupby(config: Config, df: DF, by: Optional[str] = None): + if not by: + by = config['report.by'] + df = df[[by, 'size']].groupby(by).aggregate(np.sum).reset_index() + if by in SYNTHESIZE: + df = SYNTHESIZE[by][1](df) + return df diff --git a/.github/scripts/memory/memdf/sizedb.py b/.github/scripts/memory/memdf/sizedb.py new file mode 100644 index 0000000..1228969 --- /dev/null +++ b/.github/scripts/memory/memdf/sizedb.py @@ -0,0 +1,253 @@ +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Sqlite3 database of binary sizes over time.""" + +import collections +import json +import logging +import sqlite3 +import zipfile +from pathlib import Path +from typing import IO, Dict, Iterable, List, Optional, Union + +import memdf.util.sqlite + +ChangeInfo = collections.namedtuple('ChangeInfo', [ + 'columns', 'rows', 'things', 'builds', 'stale_builds', 'artifacts', + 'stale_artifacts' +]) + + +class SizeDatabase(memdf.util.sqlite.Database): + """A database for recording and comparing size reports.""" + on_open = ["PRAGMA foreign_keys = ON", "PRAGMA encoding = 'UTF-8'"] + on_writable = [ + """ + -- A ‘thing’ identifies the kind of built object. + -- Builds of the same thing are comparable. + CREATE TABLE IF NOT EXISTS thing ( + id INTEGER PRIMARY KEY, + platform TEXT NOT NULL, -- Build platform + config TEXT NOT NULL, -- Build configuration discriminator + target TEXT NOT NULL, -- Build target + UNIQUE(platform, config, target) + ) + """, """ + -- A ‘build’ identifies a built instance of a thing at some point. + CREATE TABLE IF NOT EXISTS build ( + id INTEGER PRIMARY KEY, + thing_id INTEGER REFERENCES thing(id), + hash TEXT NOT NULL, -- Commit hash + parent TEXT NOT NULL, -- Parent commit hash + pr INTEGER DEFAULT 0, -- Github PR number + time INTEGER NOT NULL, -- Unix-epoch timestamp + artifact INTEGER DEFAULT 0, -- Github artifact ID + commented INTEGER DEFAULT 0, -- 1 if recorded in a GH comment + ref TEXT, -- Target git ref + event TEXT, -- Github build trigger event + UNIQUE(thing_id, hash, parent, pr, time, artifact) + ) + """, """ + -- A ‘size’ entry gives the size of an area for a particular build. + CREATE TABLE IF NOT EXISTS size ( + build_id INTEGER REFERENCES build(id), + kind TEXT NOT NULL, -- Area kind + name TEXT NOT NULL, -- Area name + size INTEGER NOT NULL, -- Size in bytes + PRIMARY KEY (build_id, name) + ) + """ + ] + + def __init__(self, filename: str, writable: bool = True): + super().__init__(filename, writable) + + def add_sizes(self, **kwargs): + """ + Add a size report to the database. + + The incoming arguments must contain the required non-ID column names + from ‘thing’ and ‘build’ tables, plus a 'sizes' entry that is a + sequence of mappings containing 'name' and 'size'. + """ + td = {k: kwargs[k] for k in ('platform', 'config', 'target')} + thing = self.store_and_return_id('thing', **td) + bd = {k: kwargs[k] for k in ('hash', 'parent', 'time', 'event')} + if 'ref' in kwargs: + bd['ref'] = kwargs['ref'] + cd = {k: kwargs.get(k, 0) for k in ('pr', 'artifact', 'commented')} + build = self.store_and_return_id('build', thing_id=thing, **bd, **cd) + if build is None: + logging.error('Failed to store %s %s %s', thing, bd, cd) + else: + for d in kwargs['sizes']: + self.store('size', build_id=build, **d) + + def add_sizes_from_json(self, s: Union[bytes, str], origin: Dict): + """Add sizes from a JSON size report.""" + r = origin.copy() + r.update(json.loads(s)) + r['sizes'] = [] + # Add section and region sizes. + for frame in ['section', 'region']: + for i in r['frames'].get(frame, []): + r['sizes'].append({ + 'name': i[frame], + 'size': i['size'], + 'kind': frame + }) + self.add_sizes(**r) + + def add_sizes_from_zipfile(self, f: Union[IO, Path], origin: Dict): + """Add size reports from a zip.""" + with zipfile.ZipFile(f, 'r') as zip_file: + for i in zip_file.namelist(): + if i.endswith('-sizes.json'): + origin['member'] = i + with zip_file.open(i) as member: + self.add_sizes_from_json(member.read(), origin) + + def add_sizes_from_file(self, filename: str): + """Add size reports from a file.""" + origin = {'file': filename} + path = Path(filename) + if path.suffix == '.json': + logging.info('ASJ: reading JSON %s', path) + with open(path, encoding='utf-8') as f: + self.add_sizes_from_json(f.read(), origin) + elif path.suffix == '.zip': + logging.info('ASZ: reading ZIP %s', path) + self.add_sizes_from_zipfile(path, origin) + else: + logging.warning('Unknown file type "%s" ignored', filename) + + def select_thing_id(self, platform: str, config: str, + target: str) -> Optional[str]: + cur = self.execute( + 'SELECT id FROM thing WHERE platform=? AND config=? AND target=?', + (platform, config, target)) + row = cur.fetchone() + return row[0] if row else None + + def select_sections_for_thing(self, thing: str) -> List[str]: + cur = self.execute( + ''' + SELECT DISTINCT name FROM size WHERE build_id = ( + SELECT DISTINCT id FROM build WHERE thing_id == ?) + ORDER BY name + ''', (thing,)) + return [row[0] for row in cur.fetchall()] + + def select_matching_commits(self): + """Find matching builds, where one's commit is the other's parent.""" + return self.execute(''' + SELECT DISTINCT + c.event as event, + c.pr AS pr, + c.hash AS hash, + p.hash AS parent + FROM build c + INNER JOIN build p ON p.hash = c.parent + WHERE c.commented = 0 + ORDER BY c.time DESC, c.pr, c.hash, p.hash + ''') + + def select_changes(self, parent: str, commit: str) -> ChangeInfo: + """Returns size changes between the given commits.""" + cur = self.execute( + ''' + SELECT DISTINCT + t.id AS thing, + cb.artifact AS artifact, + pb.id AS parent_build, + cb.id AS commit_build, + t.platform, t.config, t.target, + cs.kind AS kind, + cs.name AS name, + ps.size AS parent_size, + cs.size AS commit_size, + cb.time AS time + FROM thing t + INNER JOIN build cb ON cb.thing_id = t.id + INNER JOIN build pb ON pb.thing_id = t.id AND pb.hash = cb.parent + INNER JOIN size cs ON cs.build_id = cb.id + INNER JOIN size ps ON ps.build_id = pb.id AND cs.name = ps.name + WHERE cb.hash = ? AND pb.hash = ? + ORDER BY t.platform, t.config, t.target, + cs.name, cb.time DESC, pb.time DESC + ''', (commit, parent)) + + keep = ('platform', 'target', 'config', 'kind', 'name', 'parent_size', + 'commit_size') + things: set[int] = set() + artifacts: set[int] = set() + builds: set[int] = set() + stale_builds: set[int] = set() + stale_artifacts: set[int] = set() + previous: Optional[sqlite3.Row] = None + rows = [] + + for row in cur.fetchall(): + row = sqlite3.Row(cur, row) + things.add(row['thing']) + if (previous is not None and row['thing'] == previous['thing'] + and row['name'] == previous['name']): + # This is duplicate build, older because we sort descending, + # presumably from a partial workflow re-run. + if row['parent_build'] != previous['parent_build']: + stale_builds.add(row['parent_build']) + if row['commit_build'] != previous['commit_build']: + stale_builds.add(row['commit_build']) + stale_artifacts.add(row['artifact']) + else: + previous = row + new = [row[k] for k in keep] + parent_size = row['parent_size'] + commit_size = row['commit_size'] + new.append(commit_size - parent_size) + new.append(self.percent_change(parent_size, commit_size)) + rows.append(new) + artifacts.add(row['artifact']) + builds.add(row['commit_build']) + + return ChangeInfo(('platform', 'target', 'config', 'kind', 'section', + parent[:8], commit[:8], 'change', '% change'), rows, + things, builds, stale_builds, artifacts, + stale_artifacts) + + def set_commented(self, build_ids: Iterable[int]): + """Set the commented flag for the given builds.""" + if not build_ids: + return + for build_id in build_ids: + self.execute('UPDATE build SET commented = 1 WHERE id = ?', + (build_id, )) + self.commit() + + def delete_builds(self, build_ids: Iterable[int]): + """Delete the given builds.""" + if not build_ids: + return + for build_id in build_ids: + self.execute('DELETE FROM size WHERE build_id = ?', (build_id, )) + self.execute('DELETE FROM build WHERE id = ?', (build_id, )) + self.commit() + + @staticmethod + def percent_change(a: int, b: int) -> float: + if a == 0: + return 0.0 if b == 0 else float('inf') + return 100. * (b - a) / a diff --git a/.github/scripts/memory/memdf/util/__init__.py b/.github/scripts/memory/memdf/util/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.github/scripts/memory/memdf/util/config.py b/.github/scripts/memory/memdf/util/config.py new file mode 100644 index 0000000..fc159db --- /dev/null +++ b/.github/scripts/memory/memdf/util/config.py @@ -0,0 +1,312 @@ +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Configuration utilities for MDF tools""" + +import argparse +import ast +import logging +import re +from typing import Any, Dict, List, Mapping, MutableMapping, Optional, Pattern, Sequence, Tuple, Union + +import humanfriendly # type: ignore +import memdf.util.nd as nd +import memdf.util.pretty + +# A ConfigDescription is a declarative description of configuration options. +# +# In a description dictionary, (most) keys are configuration keys +# and values are dictionaries that MUST contain at least +# 'help': help string. +# 'default': default value. +# and may contain: +# 'metavar': if the command line argument takes a value +# 'choices': if the argument value must be one of several specific values +# 'argparse': additional argument parsing information; most of this is +# supplied as keyword arguments to `argparse.add_argument()`, +# except for: +# 'alias': list of alternate command line option names +# 'postprocess': a callable invoked after argument parsing with three +# arguments: the config, the key, and the description entry. +# +# Special keys can be used to control argument parser groups. By default any +# configuration key containing a ‘.’ belongs to a group determined by the +# key prefix (the part before the first ‘.’). +# Config.group_def(group): +# the value is supplied as keyword arguments to +# `argparse.add_argument_group()` +# Config.group_map(prefix): +# the value contains a key 'group', whose value is the group +# to be used for configuration keys with the given prefix. +# +ConfigDescription = Mapping[Union[str, Tuple[int, str]], Mapping[str, Any]] + + +class Config: + """Key/value store and argument parsing. + + A configuration key is a string where dots (`.`) separate levels in the + underlying nested dictionary. + + For functions that take a Config, an empty `Config()` is normally + acceptable. These functions should always assume reasonable defaults, + so that they can be used without any particular configuration. + """ + + def __init__(self): + self.d: MutableMapping = {} + self.argparse = None + self.argparse_groups = {} + self.group_alias = {} + self.postprocess_args = {} + self.config_desc: ConfigDescription = None + self.dest_to_key: MutableMapping = {} + self.key_to_dest: MutableMapping = {} + + # Basic config access + + def get(self, key: str, default: Any = None) -> Any: + return self.getl(key.split('.'), default) + + def __getitem__(self, key: str) -> Any: + """[] syntax for configuration. + + Note that this will return `None` for an unknown key, since the + absence of a configured value is not considered an error. + """ + return self.get(key) + + def getl(self, keys: nd.Key, default: Any = None) -> Any: + return nd.get(self.d, keys, default) + + def put(self, key: str, value: Any) -> None: + self.putl(key.split('.'), value) + + def __setitem__(self, key: str, value: Any) -> None: + self.put(key, value) + + def putl(self, keys: nd.Key, value: Any) -> None: + nd.put(self.d, keys, value) + + def update(self, src: Mapping) -> None: + nd.update(self.d, src) + + # Command line and config file reading + + _GROUP_DEF = 1 + _GROUP_MAP = 2 + + @staticmethod + def group_def(s: str) -> Tuple[int, str]: + return (Config._GROUP_DEF, s) + + @staticmethod + def group_map(s: str) -> Tuple[int, str]: + return (Config._GROUP_MAP, s) + + def init_config(self, desc: ConfigDescription) -> 'Config': + """Initialize a configuration from a description dictionary. + + Note that this initializes only the key/value store, + not anything associated with command line argument parsing. + """ + self.config_desc = desc + for key, info in desc.items(): + if isinstance(key, str): + self.put(key, info['default']) + + return self + + def init_args(self, desc: ConfigDescription, *args, **kwargs) -> 'Config': + """Initialize command line argument parsing.""" + self.argparse = argparse.ArgumentParser(*args, **kwargs) + + # Groups + for key, info in desc.items(): + if not isinstance(key, tuple): + continue + kind, name = key + if kind == self._GROUP_MAP: + self.group_alias[name] = info['group'] + elif kind == self._GROUP_DEF: + self.argparse_groups[name] = self.argparse.add_argument_group( + **info) + + # Arguments + for key, info in desc.items(): + if not isinstance(key, str): + continue + if (arg_info := info.get('argparse', {})) is False: + continue + + arg_info = arg_info.copy() + name = arg_info.pop('argument', '--' + key.replace('.', '-')) + names = [name] + arg_info.pop('alias', []) + info['names'] = names + for k in ['metavar', 'choices']: + if k in info: + arg_info[k] = info[k] + default = info['default'] + if not arg_info.get('action'): + if isinstance(default, list): + arg_info['action'] = 'append' + elif default is False: + arg_info['action'] = 'store_true' + elif default is True: + arg_info['action'] = 'store_false' + elif isinstance(default, int) and 'metavar' not in info: + arg_info['action'] = 'count' + if postprocess := info.get('postprocess'): + self.postprocess_args[key] = (postprocess, info) + + group: Optional[str] = info.get('group') + if group is None and (e := key.find('.')) > 0: + group = key[0:e] + group = self.group_alias.get(group, group) + arg_group = self.argparse_groups.get(group, self.argparse) + arg = arg_group.add_argument(*names, + help=info['help'], + default=self.get(key, default), + **arg_info) + self.dest_to_key[arg.dest] = key + self.key_to_dest[key] = arg.dest + + return self + + def init(self, desc: ConfigDescription, *args, **kwargs) -> 'Config': + """Intialize configuration from a configuration description.""" + self.init_config(desc) + self.init_args(desc, *args, **kwargs) + return self + + def parse(self, argv: Sequence[str]) -> 'Config': + """Parse command line options into a configuration dictionary.""" + + # Read config file(s). + config_parser = argparse.ArgumentParser(add_help=False, + allow_abbrev=False) + config_arg: Dict[str, Any] = { + 'metavar': 'FILE', + 'default': [], + 'action': 'append', + 'help': 'Read configuration FILE' + } + config_parser.add_argument('--config-file', **config_arg) + self.argparse.add_argument('--config-file', **config_arg) + config_args, argv = config_parser.parse_known_args(argv[1:]) + for filename in config_args.config_file: + self.read_config_file(filename) + + # Update argparser defaults. + defaults = {} + for dest, key in self.dest_to_key.items(): + default = self.get(key) + if default is not None: + defaults[dest] = default + self.argparse.set_defaults(**defaults) + + # Parse command line arguments and update config. + args = self.argparse.parse_args(argv) + for dest, value in vars(args).items(): + if (key := self.dest_to_key.get(dest)) is None: + key = 'args.' + dest + self.put(key, value) + + # Configure logging. + if self.get('log-level') is None: + verbose = self.get('verbose', 0) + self.put('log-level', + (logging.DEBUG if verbose > 1 else + logging.INFO if verbose else logging.WARNING)) + else: + self.put('log-level', + getattr(logging, self.get('log-level').upper())) + logging.basicConfig(level=self.get('log-level'), + format=self.get('log-format')) + + # Postprocess config. + for key, postprocess in self.postprocess_args.items(): + action, info = postprocess + action(self, key, info) + + memdf.util.pretty.debug(self.d) + return self + + def read_config_file(self, filename: str) -> 'Config': + """Read a configuration file.""" + with open(filename, 'r') as fp: + d = ast.literal_eval(fp.read()) + nd.update(self.d, d) + return self + + @staticmethod + def transpose_dictlist(src: Dict[str, List[str]]) -> Dict[str, str]: + d: Dict[str, str] = {} + for k, vlist in src.items(): + for v in vlist: + d[v] = k + return d + + def getl_re(self, key: nd.Key) -> Optional[Pattern]: + """Get a cached compiled regular expression for a config value list.""" + regex_key: nd.Key = ['cache', 're'] + key + regex: Optional[Pattern] = self.getl(regex_key) + if not regex: + branches: Optional[Sequence[str]] = self.getl(key) + if branches: + regex = re.compile('|'.join(branches)) + self.putl(regex_key, regex) + return regex + + def get_re(self, key: str) -> Optional[Pattern]: + return self.getl_re(key.split('.')) + + +# Argument parsing helpers + + +def parse_size(s: str) -> int: + return humanfriendly.parse_size(s, binary=True) if s else 0 + + +class ParseSizeAction(argparse.Action): + """argparse helper for humanfriendly sizes""" + + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, parse_size(values)) + + +# Config description of options shared by all tools. +CONFIG: ConfigDescription = { + 'log-level': { + 'help': + 'Set logging level: one of critical, error, warning, info, debug.', + 'default': None, + 'choices': ['critical', 'error', 'warning', 'info', 'debug'], + }, + 'log-format': { + 'help': 'Set logging format', + 'metavar': 'FORMAT', + 'default': '%(message)s', + }, + 'verbose': { + 'help': 'Show informational messages; repeat for debugging messages', + 'default': 0, + 'argparse': { + 'alias': ['-v'], + 'action': 'count', + }, + }, +} diff --git a/.github/scripts/memory/memdf/util/github.py b/.github/scripts/memory/memdf/util/github.py new file mode 100644 index 0000000..1c7e342 --- /dev/null +++ b/.github/scripts/memory/memdf/util/github.py @@ -0,0 +1,260 @@ +# +# Copyright (c) 2022 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Utility wrapper for GitHub operations.""" + +import itertools +import logging +import os +import subprocess +from typing import Iterable, Mapping, Optional + +import dateutil # type: ignore +import dateutil.parser # type: ignore +import ghapi.all # type: ignore +from memdf import Config, ConfigDescription + + +def postprocess_config(config: Config, _key: str, _info: Mapping) -> None: + """Postprocess --github-repository.""" + if config['github.repository']: + owner, repo = config.get('github.repository').split('/', 1) + config.put('github.owner', owner) + config.put('github.repo', repo) + if not config['github.token']: + config['github.token'] = os.environ.get('GITHUB_TOKEN') + if not config['github.token']: + logging.error('Missing --github-token') + + +CONFIG: ConfigDescription = { + Config.group_def('github'): { + 'title': 'github options', + }, + 'github.token': { + 'help': 'Github API token, or "SKIP" to suppress connecting to github', + 'metavar': 'TOKEN', + 'default': '', + 'argparse': { + 'alias': ['--github-api-token', '--token'], + }, + }, + 'github.repository': { + 'help': 'Github repostiory', + 'metavar': 'OWNER/REPO', + 'default': '', + 'argparse': { + 'alias': ['--repo'], + }, + 'postprocess': postprocess_config, + }, + 'github.dryrun-comment': { + 'help': "Don't actually post comments", + 'default': False, + }, + 'github.keep': { + 'help': "Don't remove PR artifacts", + 'default': False, + 'argparse': { + 'alias': ['--keep'], + }, + }, + 'github.limit-artifact-pages': { + 'help': 'Examine no more than COUNT pages of artifacts', + 'metavar': 'COUNT', + 'default': 0, + 'argparse': { + 'type': int, + }, + }, +} + + +class Gh: + """Utility wrapper for GitHub operations.""" + + def __init__(self, config: Config): + self.config = config + self.ghapi: Optional[ghapi.all.GhApi] = None + self.deleted_artifacts: set[int] = set() + + owner = config['github.owner'] + repo = config['github.repo'] + token = config['github.token'] + if owner and repo and token and token != 'SKIP': + self.ghapi = ghapi.all.GhApi(owner=owner, repo=repo, token=token) + + def __bool__(self): + return self.ghapi is not None + + def get_comments_for_pr(self, pr: int): + """Iterate PR comments.""" + assert self.ghapi + try: + return itertools.chain.from_iterable( + ghapi.all.paged(self.ghapi.issues.list_comments, pr)) + except Exception as e: + logging.error('Failed to get comments for PR #%d: %s', pr, e) + return [] + + def get_commits_for_pr(self, pr: int): + """Iterate PR commits.""" + assert self.ghapi + try: + return itertools.chain.from_iterable( + ghapi.all.paged(self.ghapi.pulls.list_commits, pr)) + except Exception as e: + logging.error('Failed to get commits for PR #%d: %s', pr, e) + return [] + + def get_artifacts(self, page_limit: int = -1, per_page: int = -1): + """Iterate artifact descriptions.""" + if page_limit < 0: + page_limit = self.config['github.limit-artifact-pages'] + if per_page < 0: + per_page = self.config['github.artifacts-per-page'] or 100 + + assert self.ghapi + try: + page = 0 + for i in ghapi.all.paged( + self.ghapi.actions.list_artifacts_for_repo, + per_page=per_page): + if not i.artifacts: + break + for a in i.artifacts: + yield a + page += 1 + logging.debug('ASP: artifact page %d of %d', page, page_limit) + if page_limit and page >= page_limit: + break + except Exception as e: + logging.error('Failed to get artifact list: %s', e) + + def get_size_artifacts(self, + page_limit: int = -1, + per_page: int = -1, + label: str = ''): + """Iterate size artifact descriptions.""" + for a in self.get_artifacts(page_limit, per_page): + # Size artifacts have names of the form: + # Size,{group},{pr},{commit_hash},{parent_hash}[,{event}] + # This information is added to the attribute record from GitHub. + if a.name.startswith('Size,') and a.name.count(',') >= 4: + _, group, pr, commit, parent, *etc = a.name.split(',') + if label and group != label: + continue + a.group = group + a.commit = commit + a.parent = parent + a.pr = pr + a.created_at = dateutil.parser.isoparse(a.created_at) + # Old artifact names don't include the event. + if etc: + event = etc[0] + else: + event = 'push' if pr == '0' else 'pull_request' + a.event = event + yield a + + def download_artifact(self, artifact_id: int): + """Download a GitHub artifact, returning a binary zip object.""" + logging.debug('Downloading artifact %d', artifact_id) + try: + assert self.ghapi + + # It seems like github artifact download is at least partially broken + # (see https://github.com/project-chip/connectedhomeip/issues/32656) + # + # This makes `self.ghapi.actions.download_artifact` not work + # + # Oddly enough downloading via CURL seems ok + owner = self.config['github.owner'] + repo = self.config['github.repo'] + token = self.config['github.token'] + + download_url = f"https://api.github.com/repos/{owner}/{repo}/actions/artifacts/{artifact_id}/zip" + + # Follow https://docs.github.com/en/rest/actions/artifacts?apiVersion=2022-11-28#download-an-artifact + return subprocess.check_output( + [ + 'curl', + '-L', + '-H', 'Accept: application/vnd.github+json', + '-H', f'Authorization: Bearer {token}', + '-H', 'X-GitHub-Api-Version: 2022-11-28', + '--output', '-', + download_url + ] + ) + except Exception as e: + logging.error('Failed to download artifact %d: %s', artifact_id, e) + return None + + def delete_artifact(self, artifact_id: int) -> bool: + """Delete a GitHub artifact.""" + if not artifact_id or artifact_id in self.deleted_artifacts: + return True + self.deleted_artifacts.add(artifact_id) + + if self.config['github.keep']: + logging.info('Suppressed deleting artifact %d', artifact_id) + return False + + try: + assert self.ghapi + logging.info('Deleting artifact %d', artifact_id) + self.ghapi.actions.delete_artifact(artifact_id) + return True + except Exception as e: + # During manual testing we sometimes lose the race against CI. + logging.error('Failed to delete artifact %d: %s', artifact_id, e) + return False + + def delete_artifacts(self, artifacts: Iterable[int]): + for artifact_id in artifacts: + self.delete_artifact(artifact_id) + + def create_comment(self, issue_id: int, text: str) -> bool: + """Create a GitHub comment.""" + if self.config['github.dryrun-comment']: + logging.info('Suppressed creating comment on #%d', issue_id) + logging.debug('%s', text) + return False + + assert self.ghapi + logging.info('Creating comment on #%d', issue_id) + try: + self.ghapi.issues.create_comment(issue_id, text) + return True + except Exception as e: + logging.error('Failed to created comment on #%d: %s', issue_id, e) + return False + + def update_comment(self, comment_id: int, text: str) -> bool: + """Update a GitHub comment.""" + if self.config['github.dryrun-comment']: + logging.info('Suppressed updating comment #%d', comment_id) + logging.debug('%s', text) + return False + + logging.info('Updating comment #%d', comment_id) + try: + assert self.ghapi + self.ghapi.issues.update_comment(comment_id, text) + return True + except Exception as e: + logging.error('Failed to update comment %d: %s', comment_id, e) + return False diff --git a/.github/scripts/memory/memdf/util/markdown.py b/.github/scripts/memory/memdf/util/markdown.py new file mode 100644 index 0000000..d738059 --- /dev/null +++ b/.github/scripts/memory/memdf/util/markdown.py @@ -0,0 +1,41 @@ +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Markdown utilities.""" + + +def read_hierified(f): + """Read a markdown table in ‘hierified’ format.""" + + line = f.readline() + header = tuple((s.strip() for s in line.split('|')[1:-1])) + + _ = f.readline() # The line under the header. + + rows = [] + for line in f: + line = line.strip() + if not line: + break + row = [] + columns = line.split('|') + for i in range(0, len(header)): + column = columns[i + 1].strip() + if not column: + column = rows[-1][i] if rows else '(blank)' + row.append(column) + rows.append(tuple(row)) + + return (header, rows) diff --git a/.github/scripts/memory/memdf/util/nd.py b/.github/scripts/memory/memdf/util/nd.py new file mode 100644 index 0000000..7cc8331 --- /dev/null +++ b/.github/scripts/memory/memdf/util/nd.py @@ -0,0 +1,79 @@ +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Nested dictionary utilities.""" + +from typing import Any, Mapping, MutableMapping, Optional, Sequence + +Key = Sequence + + +def get(nd: Optional[Mapping], keys: Key, default: Any = None) -> Any: + """Get a value from a nested dictionary.""" + d: Any = nd + while d and keys: + d = d.get(keys[0]) + keys = keys[1:] + if d is not None and d != {}: + return d + if default is not None: + return default + return d + + +def put(nd: MutableMapping, keys: Key, value: Any) -> None: + """Store a value in a nested dictionary.""" + while True: + key = keys[0] + keys = keys[1:] + if not keys: + break + if key not in nd: + nd[key] = {} + nd = nd[key] + nd[key] = value + + +def store(nd: MutableMapping, keys: Key, value: Any, empty: Any, add) -> None: + """Store a value in a nested dictionary where the leaves are containers.""" + while True: + key = keys[0] + keys = keys[1:] + if not keys: + break + if key not in nd: + nd[key] = {} + nd = nd[key] + if key not in nd: + nd[key] = empty + add(nd[key], value) + + +def update(nd: MutableMapping, src: Mapping) -> None: + """Update a nested dictionary.""" + for k, v in src.items(): + if k not in nd or nd[k] is None: + nd[k] = v + elif isinstance(nd[k], dict) and isinstance(v, dict): + update(nd[k], v) + elif isinstance(nd[k], list): + if isinstance(v, list): + nd[k] += v + else: + nd[k].append(v) + elif type(nd[k]) is type(v): + nd[k] = v + else: + raise TypeError(f"type mismatch {k},{v} was {nd[k]}") diff --git a/.github/scripts/memory/memdf/util/pretty.py b/.github/scripts/memory/memdf/util/pretty.py new file mode 100644 index 0000000..ed0cd23 --- /dev/null +++ b/.github/scripts/memory/memdf/util/pretty.py @@ -0,0 +1,34 @@ +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Pretty print logging.""" + +import logging +import pprint +from typing import Any + + +def log(level: int, x: Any) -> None: + if logging.getLogger(None).isEnabledFor(level): + for line in pprint.pformat(x).split('\n'): + logging.log(level, line) + + +def info(x: Any) -> None: + log(logging.INFO, x) + + +def debug(x: Any) -> None: + log(logging.DEBUG, x) diff --git a/.github/scripts/memory/memdf/util/sqlite.py b/.github/scripts/memory/memdf/util/sqlite.py new file mode 100644 index 0000000..64c32e0 --- /dev/null +++ b/.github/scripts/memory/memdf/util/sqlite.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Wrapper and utility functions around sqlite3""" + +import sqlite3 +from typing import List, Optional + +import pandas as pd # type: ignore +from memdf import Config, ConfigDescription + +CONFIG: ConfigDescription = { + Config.group_def('database'): { + 'title': 'database options', + }, + 'database.file': { + 'help': 'Sqlite3 file', + 'metavar': 'FILENAME', + 'default': None, + 'argparse': { + 'alias': ['--db'], + }, + }, +} + + +class Database: + """Wrapper and utility functions around sqlite3""" + on_open: Optional[List[str]] = None + on_writable: Optional[List[str]] = None + + def __init__(self, filename: str, writable: bool = True): + self.filename = filename + self.writable = writable + self.con: Optional[sqlite3.Connection] = None + + def __enter__(self): + return self.open() + + def __exit__(self, et, ev, traceback): + self.close() + return False + + def open(self): + """Open and initialize the database connection.""" + if not self.con: + db = 'file:' + self.filename + if not self.writable: + db += '?mode=ro' + self.con = sqlite3.connect(db, uri=True) + if self.on_open: + for i in self.on_open: + self.con.execute(i) + if self.writable and self.on_writable: + for i in self.on_writable: + self.con.execute(i) + return self + + def close(self): + if self.con: + self.con.close() + self.con = None + return self + + def connection(self) -> sqlite3.Connection: + assert self.con + return self.con + + def execute(self, query, parameters=None): + if parameters: + return self.con.execute(query, parameters) + return self.con.execute(query) + + def commit(self): + self.con.commit() + return self + + def store(self, table: str, **kwargs): + """Insert the data if it does not already exist.""" + q = (f"INSERT INTO {table} ({','.join(kwargs.keys())})" + f" VALUES ({','.join('?' * len(kwargs))})" + f" ON CONFLICT DO NOTHING") + v = list(kwargs.values()) + self.connection().execute(q, v) + + def get_matching(self, table: str, columns: List[str], **kwargs): + q = (f"SELECT {','.join(columns)} FROM {table}" + f" WHERE {'=? AND '.join(kwargs.keys())}=?") + v = list(kwargs.values()) + return self.connection().execute(q, v) + + def get_matching_id(self, table: str, **kwargs): + cur = self.get_matching(table, ['id'], **kwargs) + row = cur.fetchone() + if row: + return row[0] + return None + + def store_and_return_id(self, table: str, **kwargs) -> Optional[int]: + self.store(table, **kwargs) + return self.get_matching_id(table, **kwargs) + + def data_frame(self, query, parameters=None) -> pd.DataFrame: + """Return the results of a query as a DataFrame.""" + cur = self.execute(query, parameters) + columns = [i[0] for i in cur.description] + df = pd.DataFrame(cur.fetchall(), columns=columns) + self.commit() + df.attrs = {'title': query} + return df diff --git a/.github/scripts/memory/memdf/util/subprocess.py b/.github/scripts/memory/memdf/util/subprocess.py new file mode 100644 index 0000000..8901d19 --- /dev/null +++ b/.github/scripts/memory/memdf/util/subprocess.py @@ -0,0 +1,30 @@ +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Subprocess utilities.""" + +import logging +import subprocess +from typing import List + +from memdf.util.config import Config + + +def run_tool_pipe(config: Config, command: List[str]) -> subprocess.Popen: + """Run a command.""" + if tool := config.getl(['tool', command[0]]): + command[0] = tool + logging.info('Execute: %s', ' '.join(command)) + return subprocess.Popen(command, stdout=subprocess.PIPE) diff --git a/.github/scripts/memory/report_summary.py b/.github/scripts/memory/report_summary.py new file mode 100644 index 0000000..04508c8 --- /dev/null +++ b/.github/scripts/memory/report_summary.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2021 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Generate a summary of memory use. + +This program reads memory usage information produces aggregated totals +according to the `--report-by` option: +* `--report-by section` (the default) aggregates memory usage by section. +* `--report-by region` aggregates memory usage by region. A region is a + configuration-defined group of sections. + +Use `--collect-method=help` to see available collection methods. +Use `--output-format=help` to see available output formats. +""" + +import sys + +import memdf.collect +import memdf.report +import memdf.select +from memdf import Config, DFs, SymbolDF + + +def main(argv): + status = 0 + try: + config: Config = memdf.collect.parse_args( + { + **memdf.select.CONFIG, + **memdf.report.REPORT_CONFIG, + **memdf.report.REPORT_BY_CONFIG, + **memdf.report.OUTPUT_CONFIG, + }, argv) + dfs: DFs = memdf.collect.collect_files(config) + + symbols = dfs[SymbolDF.name] + summary = memdf.select.groupby(config, symbols) + memdf.report.write_dfs(config, {SymbolDF.name: summary}) + + except Exception as exception: + raise exception + + return status + + +if __name__ == '__main__': + sys.exit(main(sys.argv)) \ No newline at end of file diff --git a/.github/workflows/bloat-check.yaml b/.github/workflows/bloat-check.yaml new file mode 100644 index 0000000..eb4e725 --- /dev/null +++ b/.github/workflows/bloat-check.yaml @@ -0,0 +1,55 @@ +# Copyright (c) 2020-2025 Project CHIP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Bloat Check +on: + workflow_dispatch: + schedule: + - cron: "*/5 * * * *" + +concurrency: + group: ${{ github.workflow }} + # Don't cancel an already-running bloat check just because it took more + # than 5 minutes to run and our cron job is trying to schedule a new one. + cancel-in-progress: false + +jobs: + pull_request_update: + name: Report on pull requests + + # Don't run on forked repos + if: github.repository_owner == 'sysgrok' + + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v5 + - name: Set up environment for size reports + uses: ./.github/actions/setup-size-reports + if: ${{ !env.ACT }} + with: + gh-context: ${{ toJson(github) }} + - name: Report + run: | + python .github/scripts/memory/gh_report.py \ + --verbose \ + --report-increases 0.2 \ + --report-pr \ + --github-comment \ + --github-limit-artifact-pages 50 \ + --github-limit-artifacts 500 \ + --github-limit-comments 20 \ + --github-repository project-chip/rs-matter \ + --github-api-token "${{ secrets.GITHUB_TOKEN }}" \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 14e198e..a59cc5f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -54,4 +54,28 @@ jobs: - name: Examples if: matrix.features == 'os' - run: cargo build --examples --features ${{matrix.features}},nix,log,examples + run: cargo build --release --examples --features ${{matrix.features}},nix,log,examples + + - name: Set up environment for size reports + uses: ./.github/actions/setup-size-reports + if: ${{ !env.ACT && matrix.features == 'os' }} + with: + gh-context: ${{ toJson(github) }} + + - name: Prepare bloat report from the previous builds + if: matrix.features == 'os' + run: | + python .github/scripts/memory/gh_sizes.py \ + light infologs-optz-ltofat x86_64-unknown-linux-gnu \ + target/release/examples/light \ + /tmp/bloat_reports/ + python .github/scripts/memory/gh_sizes.py \ + light_eth infologs-optz-ltofat x86_64-unknown-linux-gnu \ + target/release/examples/light_eth \ + /tmp/bloat_reports/ + + - name: Uploading Size Reports + uses: ./.github/actions/upload-size-reports + if: ${{ !env.ACT }} + with: + platform-name: cross-platform diff --git a/Cargo.toml b/Cargo.toml index 0dbf923..e14173d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,13 +20,19 @@ rs-matter = { git = "https://github.com/sysgrok/rs-matter", branch = "next" } #edge-nal-std = { git = "https://github.com/sysgrok/edge-net" } #edge-mdns = { git = "https://github.com/sysgrok/edge-net" } -[profile.release] -opt-level = "s" - [profile.dev] debug = true opt-level = "z" +[profile.release] +opt-level = "z" +debug = 0 +debug-assertions = false +codegen-units = 1 # LLVM can perform better optimizations using a single thread +lto = "fat" +incremental = false +overflow-checks = false + [features] default = []