mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-04 20:19:47 +08:00

The undefined logic is complex and has lots of magic on it. Implement it, using the same algorithm we have at get_abi.pl. Yet, some tweaks to optimize performance and to make the code simpler were added here: - at the perl version, the tree graph had loops, so we had to use BFS to traverse it. On this version, the graph is a tree, so, it simplifies the what group for sysfs aliases; - the logic which splits regular expressions into subgroups was re-written to make it faster; - it may optionally use multiple processes to search for symbol matches; - it has some additional debug levels. Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet <corbet@lwn.net> Link: https://lore.kernel.org/r/1529c255845d117696d5af57d8dc05554663afdf.1739182025.git.mchehab+huawei@kernel.org
379 lines
12 KiB
Python
379 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
# pylint: disable=R0902,R0912,R0914,R0915,R1702
|
|
# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
|
|
# SPDX-License-Identifier: GPL-2.0
|
|
|
|
"""
|
|
Parse ABI documentation and produce results from it.
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
from concurrent import futures
|
|
from datetime import datetime
|
|
from random import shuffle
|
|
|
|
from helpers import AbiDebug
|
|
|
|
class SystemSymbols:
|
|
"""Stores arguments for the class and initialize class vars"""
|
|
|
|
def graph_add_file(self, path, link=None):
|
|
"""
|
|
add a file path to the sysfs graph stored at self.root
|
|
"""
|
|
|
|
if path in self.files:
|
|
return
|
|
|
|
name = ""
|
|
ref = self.root
|
|
for edge in path.split("/"):
|
|
name += edge + "/"
|
|
if edge not in ref:
|
|
ref[edge] = {"__name": [name.rstrip("/")]}
|
|
|
|
ref = ref[edge]
|
|
|
|
if link and link not in ref["__name"]:
|
|
ref["__name"].append(link.rstrip("/"))
|
|
|
|
self.files.add(path)
|
|
|
|
def print_graph(self, root_prefix="", root=None, level=0):
|
|
"""Prints a reference tree graph using UTF-8 characters"""
|
|
|
|
if not root:
|
|
root = self.root
|
|
level = 0
|
|
|
|
# Prevent endless traverse
|
|
if level > 5:
|
|
return
|
|
|
|
if level > 0:
|
|
prefix = "├──"
|
|
last_prefix = "└──"
|
|
else:
|
|
prefix = ""
|
|
last_prefix = ""
|
|
|
|
items = list(root.items())
|
|
|
|
names = root.get("__name", [])
|
|
for k, edge in items:
|
|
if k == "__name":
|
|
continue
|
|
|
|
if not k:
|
|
k = "/"
|
|
|
|
if len(names) > 1:
|
|
k += " links: " + ",".join(names[1:])
|
|
|
|
if edge == items[-1][1]:
|
|
print(root_prefix + last_prefix + k)
|
|
p = root_prefix
|
|
if level > 0:
|
|
p += " "
|
|
self.print_graph(p, edge, level + 1)
|
|
else:
|
|
print(root_prefix + prefix + k)
|
|
p = root_prefix + "│ "
|
|
self.print_graph(p, edge, level + 1)
|
|
|
|
def _walk(self, root):
|
|
"""
|
|
Walk through sysfs to get all devnodes that aren't ignored.
|
|
|
|
By default, uses /sys as sysfs mounting point. If another
|
|
directory is used, it replaces them to /sys at the patches.
|
|
"""
|
|
|
|
with os.scandir(root) as obj:
|
|
for entry in obj:
|
|
path = os.path.join(root, entry.name)
|
|
if self.sysfs:
|
|
p = path.replace(self.sysfs, "/sys", count=1)
|
|
else:
|
|
p = path
|
|
|
|
if self.re_ignore.search(p):
|
|
return
|
|
|
|
# Handle link first to avoid directory recursion
|
|
if entry.is_symlink():
|
|
real = os.path.realpath(path)
|
|
if not self.sysfs:
|
|
self.aliases[path] = real
|
|
else:
|
|
real = real.replace(self.sysfs, "/sys", count=1)
|
|
|
|
# Add absfile location to graph if it doesn't exist
|
|
if not self.re_ignore.search(real):
|
|
# Add link to the graph
|
|
self.graph_add_file(real, p)
|
|
|
|
elif entry.is_file():
|
|
self.graph_add_file(p)
|
|
|
|
elif entry.is_dir():
|
|
self._walk(path)
|
|
|
|
def __init__(self, abi, sysfs="/sys", hints=False):
|
|
"""
|
|
Initialize internal variables and get a list of all files inside
|
|
sysfs that can currently be parsed.
|
|
|
|
Please notice that there are several entries on sysfs that aren't
|
|
documented as ABI. Ignore those.
|
|
|
|
The real paths will be stored under self.files. Aliases will be
|
|
stored in separate, as self.aliases.
|
|
"""
|
|
|
|
self.abi = abi
|
|
self.log = abi.log
|
|
|
|
if sysfs != "/sys":
|
|
self.sysfs = sysfs.rstrip("/")
|
|
else:
|
|
self.sysfs = None
|
|
|
|
self.hints = hints
|
|
|
|
self.root = {}
|
|
self.aliases = {}
|
|
self.files = set()
|
|
|
|
dont_walk = [
|
|
# Those require root access and aren't documented at ABI
|
|
f"^{sysfs}/kernel/debug",
|
|
f"^{sysfs}/kernel/tracing",
|
|
f"^{sysfs}/fs/pstore",
|
|
f"^{sysfs}/fs/bpf",
|
|
f"^{sysfs}/fs/fuse",
|
|
|
|
# This is not documented at ABI
|
|
f"^{sysfs}/module",
|
|
|
|
f"^{sysfs}/fs/cgroup", # this is big and has zero docs under ABI
|
|
f"^{sysfs}/firmware", # documented elsewhere: ACPI, DT bindings
|
|
"sections|notes", # aren't actually part of ABI
|
|
|
|
# kernel-parameters.txt - not easy to parse
|
|
"parameters",
|
|
]
|
|
|
|
self.re_ignore = re.compile("|".join(dont_walk))
|
|
|
|
print(f"Reading {sysfs} directory contents...", file=sys.stderr)
|
|
self._walk(sysfs)
|
|
|
|
def check_file(self, refs, found):
|
|
"""Check missing ABI symbols for a given sysfs file"""
|
|
|
|
res_list = []
|
|
|
|
try:
|
|
for names in refs:
|
|
fname = names[0]
|
|
|
|
res = {
|
|
"found": False,
|
|
"fname": fname,
|
|
"msg": "",
|
|
}
|
|
res_list.append(res)
|
|
|
|
re_what = self.abi.get_regexes(fname)
|
|
if not re_what:
|
|
self.abi.log.warning(f"missing rules for {fname}")
|
|
continue
|
|
|
|
for name in names:
|
|
for r in re_what:
|
|
if self.abi.debug & AbiDebug.UNDEFINED:
|
|
self.log.debug("check if %s matches '%s'", name, r.pattern)
|
|
if r.match(name):
|
|
res["found"] = True
|
|
if found:
|
|
res["msg"] += f" {fname}: regex:\n\t"
|
|
continue
|
|
|
|
if self.hints and not res["found"]:
|
|
res["msg"] += f" {fname} not found. Tested regexes:\n"
|
|
for r in re_what:
|
|
res["msg"] += " " + r.pattern + "\n"
|
|
|
|
except KeyboardInterrupt:
|
|
pass
|
|
|
|
return res_list
|
|
|
|
def _ref_interactor(self, root):
|
|
"""Recursive function to interact over the sysfs tree"""
|
|
|
|
for k, v in root.items():
|
|
if isinstance(v, dict):
|
|
yield from self._ref_interactor(v)
|
|
|
|
if root == self.root or k == "__name":
|
|
continue
|
|
|
|
if self.abi.re_string:
|
|
fname = v["__name"][0]
|
|
if self.abi.re_string.search(fname):
|
|
yield v
|
|
else:
|
|
yield v
|
|
|
|
|
|
def get_fileref(self, all_refs, chunk_size):
|
|
"""Interactor to group refs into chunks"""
|
|
|
|
n = 0
|
|
refs = []
|
|
|
|
for ref in all_refs:
|
|
refs.append(ref)
|
|
|
|
n += 1
|
|
if n >= chunk_size:
|
|
yield refs
|
|
n = 0
|
|
refs = []
|
|
|
|
yield refs
|
|
|
|
def check_undefined_symbols(self, max_workers=None, chunk_size=50,
|
|
found=None, dry_run=None):
|
|
"""Seach ABI for sysfs symbols missing documentation"""
|
|
|
|
self.abi.parse_abi()
|
|
|
|
if self.abi.debug & AbiDebug.GRAPH:
|
|
self.print_graph()
|
|
|
|
all_refs = []
|
|
for ref in self._ref_interactor(self.root):
|
|
all_refs.append(ref["__name"])
|
|
|
|
if dry_run:
|
|
print("Would check", file=sys.stderr)
|
|
for ref in all_refs:
|
|
print(", ".join(ref))
|
|
|
|
return
|
|
|
|
print("Starting to search symbols (it may take several minutes):",
|
|
file=sys.stderr)
|
|
start = datetime.now()
|
|
old_elapsed = None
|
|
|
|
# Python doesn't support multithreading due to limitations on its
|
|
# global lock (GIL). While Python 3.13 finally made GIL optional,
|
|
# there are still issues related to it. Also, we want to have
|
|
# backward compatibility with older versions of Python.
|
|
#
|
|
# So, use instead multiprocess. However, Python is very slow passing
|
|
# data from/to multiple processes. Also, it may consume lots of memory
|
|
# if the data to be shared is not small. So, we need to group workload
|
|
# in chunks that are big enough to generate performance gains while
|
|
# not being so big that would cause out-of-memory.
|
|
|
|
num_refs = len(all_refs)
|
|
print(f"Number of references to parse: {num_refs}", file=sys.stderr)
|
|
|
|
if not max_workers:
|
|
max_workers = os.cpu_count()
|
|
elif max_workers > os.cpu_count():
|
|
max_workers = os.cpu_count()
|
|
|
|
max_workers = max(max_workers, 1)
|
|
|
|
max_chunk_size = int((num_refs + max_workers - 1) / max_workers)
|
|
chunk_size = min(chunk_size, max_chunk_size)
|
|
chunk_size = max(1, chunk_size)
|
|
|
|
if max_workers > 1:
|
|
executor = futures.ProcessPoolExecutor
|
|
|
|
# Place references in a random order. This may help improving
|
|
# performance, by mixing complex/simple expressions when creating
|
|
# chunks
|
|
shuffle(all_refs)
|
|
else:
|
|
# Python has a high overhead with processes. When there's just
|
|
# one worker, it is faster to not create a new process.
|
|
# Yet, User still deserves to have a progress print. So, use
|
|
# python's "thread", which is actually a single process, using
|
|
# an internal schedule to switch between tasks. No performance
|
|
# gains for non-IO tasks, but still it can be quickly interrupted
|
|
# from time to time to display progress.
|
|
executor = futures.ThreadPoolExecutor
|
|
|
|
not_found = []
|
|
f_list = []
|
|
with executor(max_workers=max_workers) as exe:
|
|
for refs in self.get_fileref(all_refs, chunk_size):
|
|
if refs:
|
|
try:
|
|
f_list.append(exe.submit(self.check_file, refs, found))
|
|
|
|
except KeyboardInterrupt:
|
|
return
|
|
|
|
total = len(f_list)
|
|
|
|
if not total:
|
|
if self.abi.re_string:
|
|
print(f"No ABI symbol matches {self.abi.search_string}")
|
|
else:
|
|
self.abi.log.warning("No ABI symbols found")
|
|
return
|
|
|
|
print(f"{len(f_list):6d} jobs queued on {max_workers} workers",
|
|
file=sys.stderr)
|
|
|
|
while f_list:
|
|
try:
|
|
t = futures.wait(f_list, timeout=1,
|
|
return_when=futures.FIRST_COMPLETED)
|
|
|
|
done = t[0]
|
|
|
|
for fut in done:
|
|
res_list = fut.result()
|
|
|
|
for res in res_list:
|
|
if not res["found"]:
|
|
not_found.append(res["fname"])
|
|
if res["msg"]:
|
|
print(res["msg"])
|
|
|
|
f_list.remove(fut)
|
|
except KeyboardInterrupt:
|
|
return
|
|
|
|
except RuntimeError as e:
|
|
self.abi.log.warning(f"Future: {e}")
|
|
break
|
|
|
|
if sys.stderr.isatty():
|
|
elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0]
|
|
if len(f_list) < total:
|
|
elapsed += f" ({total - len(f_list)}/{total} jobs completed). "
|
|
if elapsed != old_elapsed:
|
|
print(elapsed + "\r", end="", flush=True,
|
|
file=sys.stderr)
|
|
old_elapsed = elapsed
|
|
|
|
elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0]
|
|
print(elapsed, file=sys.stderr)
|
|
|
|
for f in sorted(not_found):
|
|
print(f"{f} not found.")
|