ua-parser · masklinn · Feb 23, 2026 · Feb 22, 2026 · Feb 22, 2026
diff --git a/src/ua_parser/__main__.py b/src/ua_parser/__main__.py
@@ -97,27 +97,45 @@
     return rules
 
 
+def parse_item(item: str, all: list[str] | None) -> list[str]:
+    if item == '*':
+        assert all
+        return all
+    elif item.startswith('{'):
+        assert item.endswith('}')
+        return item[1:-1].split(',')
+    else:
+        return [item]
+
+def rules_to_parsers(args: argparse.Namespace) -> Iterator[tuple[str, str, int]]:
+    seen = set()
+    for selector in args.selector:
+        p, c, s = selector.split(':')
+        for triplet in (
+            (pp, 'none' if ss == 0 else cc, ss)
+            for pp in parse_item(p, ['basic', 're2', 'regex', 'legacy'])
+            for cc in (parse_item(c, list(CACHES)) if CACHEABLE[pp] else ['none'])
+            for ss in (map(int, parse_item(s, None)) if cc != 'none' else [0])
+        ):
+            if triplet not in seen:
+                seen.add(triplet)
+                yield triplet
+
 def run_stdout(args: argparse.Namespace) -> None:
-    lines = list(args.file)
+    lines = list(map(sys.intern, args.file))
     count = len(lines)
     uniques = len(set(lines))
     print(f"{args.file.name}: {count} lines, {uniques} unique ({uniques / count:.0%})")
 
-    rules = get_rules(args.bases, args.regexes)
+    parsers = list(rules_to_parsers(args))
 
-    # width of the parser label
-    w = math.ceil(
-        3
-        + max(map(len, args.bases))
-        + max(map(len, args.caches))
-        + max(map(math.log10, args.cachesizes))
+    rules = get_rules([*{p for p, _, _ in parsers}], args.regexes)
+
+    w = max(
+        math.ceil(3 + len(p) + len(c) + (s and math.log10(s)))
+        for p, c, s in parsers
     )
-    for p, c, n in (
-        (p, c, n)
-        for p in args.bases
-        for c in (args.caches if CACHEABLE[p] and args.cachesizes != [0] else ["none"])
-        for n in (args.cachesizes if c != "none" else [0])
-    ):
+    for p, c, n in parsers:
         name = "-".join(map(str, filter(None, (p, c != "none" and c, n))))
         print(f"{name:{w}}", end=": ", flush=True)
 
@@ -131,24 +149,18 @@
 
 
 def run_csv(args: argparse.Namespace) -> None:
-    lines = list(args.file)
+    lines = list(map(sys.intern, args.file))
     LEN = len(lines) * 1000
-    rules = get_rules(args.bases, args.regexes)
 
-    parsers = [
-        (p, c, n)
-        for p in args.bases
-        for c in (args.caches if CACHEABLE[p] else ["none"])
-        for n in (args.cachesizes if c != "none" else [0])
-    ]
+    parsers = list(rules_to_parsers(args))
     if not parsers:
         sys.exit("No parser selected")
 
+    rules = get_rules([*{p for p, _, _ in parsers}], args.regexes)
     columns = {"size": ""}
     columns.update(
         (f"{p}-{c}", p if c == "none" else f"{p}-{c}")
-        for p in args.bases
-        for c in (args.caches if CACHEABLE[p] else ["none"])
+        for p, c, _ in parsers
     )
     w = csv.DictWriter(
         sys.stdout,
@@ -171,11 +183,13 @@
         # cache could be ignored as it should always be `"none"`
         for parser, cache, _ in ps:
             p = get_parser(parser, cache, 0, rules)
-            zeroes[f"{parser}-{cache}"] = run(p, lines) // LEN
+            zeroes[f"{parser}-{cache}"] = run(p, linges) // LEN
 
     # special cases for configurations where we can't have
     # cachesize lines, write the template row out directly
-    if args.bases == ["legacy"] or args.caches == ["none"] or args.cachesizes == [0]:
+    if all(p == 'legacy' for p, _, _ in parsers)\
+       or all(c == 'none' for _, c, _ in parsers)\
+       or all(s == 0 for _, _, s in parsers):
         zeroes["size"] = 0
         w.writerow(zeroes)
         return
@@ -288,7 +302,7 @@
             self.count += 1
             return r
 
-    lines = list(args.file)
+    lines = list(map(sys.intern, args.file))
     total = len(lines)
     uniques = len(set(lines))
     print(total, "lines", uniques, "uniques")
@@ -343,7 +357,7 @@
 
 
 def run_threaded(args: argparse.Namespace) -> None:
-    lines = list(args.file)
+    lines = list(map(sys.intern, args.file))
     basic = BasicResolver(load_builtins())
     resolvers: List[Tuple[str, Resolver]] = [
         ("locking-lru", CachingResolver(basic, caching.Lru(CACHESIZE))),
@@ -457,37 +471,14 @@
     with a first cell of value 0.""",
 )
 bench.add_argument(
-    "--bases",
-    nargs="+",
-    choices=["basic", "re2", "regex", "legacy"],
-    default=["basic", "re2", "regex", "legacy"],
-    help="""Base resolvers to benchmark. `basic` is a linear search
-    through the regexes file, `re2` is a prefiltered regex set
-    implemented in C++, `regex` is a prefiltered regex set implemented
-    in Rust, `legacy` is the legacy API (essentially a basic resolver
-    with a clearing cache of fixed 200 entries, but less layered so
-    usually slightly faster than an equivalent basic-based resolver).""",
-)
-bench.add_argument(
-    "--caches",
-    nargs="+",
-    choices=list(CACHES),
-    default=list(CACHES),
-    help="""Cache implementations to test. `clearing` completely
-    clears the cache when full, `lru` uses a least-recently-eviction
-    policy. `lru` is not thread-safe, so `lru-threadsafe` adds a mutex
-    and measures *uncontended* locking overhead.""",
-)
-bench.add_argument(
-    "--cachesizes",
-    nargs="+",
-    type=int,
-    default=[10, 20, 50, 100, 200, 500, 1000, 2000, 5000],
-    help="""Caches are a classic way to trade memory for performances.
-    Different base resolvers and traffic patterns have different
-    benefits from caches, this option allows testing the benefits of
-    various cache sizes (and thus amounts of memory used) on the cache
-    strategies. """,
+    "selector",
+    nargs="*",
+    default=["*:*:{10,20,50,100,200,500,1000,2000,5000}"],
+    help=f"""A generative selector expression, composed of 3 parts: 1.
+the parser (base), 2. the cache implementation ({', '.join(CACHES)})
+and 3. the cache size. For parser and cache `*` is an alias for stands
+in for "every value", a bracketed expression for an enumeration, and
+the selector can be repeated to explicitly list each configuration """
 )
 
 hitrates = sub.add_parser(