cligen/strUt

Search:
Group by:

This is a portability shim include file for when you want code to work without deprecation warnings under younger & older Nim compilers. Just include cligen/unsafeAdr at the global scope before using unsafeAddr.DragonBox is fast & accurate but sadly has no output format flexibility. Someday that may improve. For now the below routines maintain speed but fill the flexibility gap at a tiny loss of accuracy. This is not so bad if your mindset is that output (&parsing) is just another calculation on floats like transcendentals. 1 ULP is often considered ok for those. Rel.err. < ~2^-52 for me. Binary|C99 hex float are cheaper marshaling & Javascript should just learn C99 hex floats already, especially since every number is float! ecvt/fcvt can be ~2X faster than DragonBox when asking for rounded results/fewer digits which for me is a common case.

Types

FloatCvtOptions = enum      ## The many options of binary -> string float conversion.
  fcPad0,                   ## Pad with '0' to the right (based upon precision `p`)
  fcCapital,                ## Use E|INF|NAN not default e|inf|nan
  fcPlus,                   ## Leading '+' for positive numbers, not default ""
  fcTrailDot,               ## Trailing '.' for round integers (to signify "FP")
  fcTrailDot0,              ## Trailing ".0" for round integers; overrides fcTrailDot
  fcExp23,                  ## 2|3 digit exp; 1e03 or 1e103 not 1e3; only for ecvt
  fcExp3,                   ## 3 digit exp; 1e003 not 1e3; only for ecvt
  fcExpPlus                  ## '+' on positive exponents; 1e+3 not 1e3; only for ecvt
The many options of binary -> string float conversion.
MacroCall = (Slice[int], Slice[int], Slice[int])
Id, Arg, whole Call
MacroCallX = (Slice[int], Slice[int], Slice[int], string)
Call & string

Vars

pmDfl = getEnv("LC_PM", " +- ")
how plus|minus is spelled

Consts

alphaNum = {'a'..'z', 'A'..'Z', '_', '0'..'9', '\x80'..'\xFF'}
pmUnicode = "±"
for re-assign/param passing ease
pmUnicodeSpaced = " ± "
for re-assign/param passing ease

Procs

proc `-`(a, b: openArray[string]): seq[string] {....raises: [], tags: [],
    forbids: [].}
All a[]s not in b (implemented efficiently with a HashSet).
proc add(result: var string; a: openArray[char]) {....raises: [], tags: [],
    forbids: [].}
proc commentStrip(s: string): string {....raises: [], tags: [], forbids: [].}
return string with any pre-'#' whitespace and any post-'#' text removed.
proc ecvt(s: var string; x: float; p = 17; opts = {fcPad0}) {.inline,
    ...raises: [], tags: [], forbids: [].}
ANSI C/Unix ecvt: float -> D.PPPPe+EE; Most conversion in int arithmetic. Accurate to ~52 bits with p=17.
proc ecvt2(man, exp: var string; x: float; p = 17; opts = {fcPad0};
           bumped: var bool = doNotUseB; expon: var int = doNotUseI) {.inline,
    ...raises: [], tags: [], forbids: [].}
ANSI C/Unix ecvt: float -> D.PPPPe+EE; Most conversion in int arithmetic. Accurate to ~52 bits with p=17.
proc endsWithI(s, suffix: string): bool {.noSideEffect, ...raises: [], tags: [],
    forbids: [].}
Case insensitive variant of endsWith.
proc fcvt(s: var string; x: float; p: int; opts = {fcPad0}) {.inline,
    ...raises: [], tags: [], forbids: [].}
ANSI C/Unix fcvt: float -> DDD.PPPP; Most conversion in integer arithmetic. Accurate to ~52 bits with p=17.
func find(s: openArray[char]; sub: char; start: Natural = 0; last = 0): int {.
    ...raises: [], tags: [], forbids: [].}
proc fmtUncertain(val, err: float; fmt0 = fmtUncertain0; fmtE = fmtUncertainE;
                  e0 = -2 .. 4; sigDigs = 2): string {....raises: [], tags: [],
    forbids: [].}
Driver for fmtUncertainRender which can do most desired formats.
proc fmtUncertainMerged(val, err: float; sigDigs = 2; e0 = -2 .. 4): string {.
    ...raises: [], tags: [], forbids: [].}
fmtUncertainMergedSci w/digit shift not e+NN for exp range near 0, e0.
proc fmtUncertainMergedSci(val, err: float; sigDigs = 2): string {....raises: [],
    tags: [], forbids: [].}
Format in "Particle Data Group" Style with uncertainty digits merged after the value and always in scientific-notation: val(err)e+NN with sigDigs of error digits. E.g. "12.34... +- 0.56..." => "1.234(56)e+01" (w/sigDigs=2).
proc fmtUncertainParts(val, err: float; sigDigs = 2): (string, string, string,
    string, int) {....raises: [], tags: [], forbids: [].}
This is a helper for nice formats. ffScientific format err to sigDigs. Then fmt val so that the final decimal place of both always aligns. Eg., (3141.5.., 45.6..) => ("3.142", "e+03", "4.6", "e+01"). Alignment means the difference in exponents should always be sigDigs. sigDigs=2 as per ISO standard JCGM 100:2008(E) $7.2.6 specific guidance available at: https://www.bipm.org/en/committees/jc/jcgm/publications (E)
proc fmtUncertainRender(val, err: float; fmt0, fmtE: string;
                        parse0, parseE: seq[MacroCall]; e0 = -2 .. 4;
                        sigDigs = 2): string {....raises: [], tags: [], forbids: [].}
Co-round val & err to sigDigs of err; fmtUncertainRender with fmt0 for valExp in exponent range near the origin, e0, else fmtE.
proc fmtUncertainRender(vm, ve, um, ue: string; exp: int; fmt: string;
                        parse: seq[MacroCall]): string {....raises: [], tags: [],
    forbids: [].}
Expand interpolates:
  • valMan valExp errMan errExp: val & err rounded to sigDigs places of err.
  • val0 err0: val & err with decimal mantissas shifted such that expon==0
  • errV: err shifted so decimal aligns with val0
  • errD: unpunctuated sigDigs digits of err
  • pm: a single strUt.pmDfl. You can change from "+-" to pmUnicode('±').
proc fmtUncertainSci(val, err: float; sigDigs = 2): string {....raises: [],
    tags: [], forbids: [].}
Format co-rounded (val${pm}err)e+NN with err to sigDigs.
proc fmtUncertainVal(val, err: float; sigDigs = 2; e0 = -2 .. 4): string {.
    ...raises: [], tags: [], forbids: [].}
Format like fmtUncertain default, but only the value part.
proc hashCB(x: openArray[char]): uint64 {....raises: [], tags: [], forbids: [].}
Hash inspired by Fletcher1982-Arithmetic Checksum. Please credit him&me! I adapted to use salt, 8B digits (filled native-Endian) & accumulators truncated via unsigned long overflow, and R=3 (in paper's terminology). This passes few SMHasher tests, but is ok in practice, fast & portable.
proc hashWY(x: openArray[char]): uint64 {....raises: [], tags: [], forbids: [].}
Wang Yi's nice hash passing SMHasher tests; Unoptimized for small keys
proc idIsLiteral(id: Slice[int]): bool {....raises: [], tags: [], forbids: [].}
Test if a MacroCall[0] (id) refers to a literal text "call".
proc joins[T](sst: seq[seq[T]]; delim: T): seq[T]
Return joined seq of sub-seqs splittable by delim. E.g. @[@["a"], @["b", "c"]].joins("--") == @["a", "--", "b", "c"].
proc joinS(sep = " "; a: varargs[string, `$`]): string {....raises: [], tags: [],
    forbids: [].}
Join after $. S is intended to suggest $. echo " ".joinS(a, b, c) is more ceremony than print but may also have broader utility.
proc split[T](st: seq[T]; delim: T): seq[seq[T]]
Return seq of sub-seqs split by delim. Delim itself is not included. E.g. @["a", "--", "b", "c"].split "--" == @[@["a"], @["b", "c"]].
proc startsWithI(s, prefix: string): bool {.noSideEffect, ...raises: [], tags: [],
    forbids: [].}
Case insensitive variant of startsWith.
proc tmplParsed(fmt: openArray[char]; meta = '$'; ids = alphaNum): seq[MacroCall] {.
    ...raises: [], tags: [], forbids: [].}
Pre-process a format string template into a seq[MacroCall].
proc tmplParses(fmt: openArray[char]; meta = '$'; ids = alphaNum): seq[
    MacroCallX] {....raises: [], tags: [], forbids: [].}
Pre-process a format string template into a seq[MacroCallX].
proc toString(s: openArray[char]): string {....raises: [], tags: [], forbids: [].}
func uint64toDecimal(res: var openArray[char]; x: uint64): int {....raises: [],
    tags: [], forbids: [].}
Flexible oA[char] inp; Fast 3B @a time out; Answer is res[result..^1].

Iterators

iterator tmplParse(fmt: openArray[char]; meta = '$'; ids = alphaNum): MacroCall {.
    ...raises: [], tags: [], forbids: [].}

A text template parser converting fmt into macro calls encoded as fmt-relative slices. Callers resolve any calls. Macro call syntax is $ID, %[ID], ${(ID)ARG} embedded within literal text. $ | % | .. is a self-escaping meta char. This generalizes most self-escaping format string interpolation styles. The idea is to make it easy to write std/strutils.%, C/Python printf-like string interpolation with one standard, but flexible syntax.

Specifically, if [X] => OPTIONAL X & <Y> => NEEDED Y, then a call is:

  • M[B][Q]<ID>[next q post Q][ARGUMENT STRING][next b post B]

where

  • M is the so-called self-escaping warning meta char, like '$', '%', ..
  • B is an optional opening brace | quote (any NON-ids)
  • b=other[B] is closing pair (other[c]==c except for ([{<>}]))
  • Q is a macro id brace|quote (IF non-ids); q=other[Q] closes

No B => calls stop at the next NON-ids char (eg., : in $ID:x) and the macro gets an empty ARG. No Q => ARG="" and whole post-M fragment is ID (egs., $FOO, $'WITH SPACES'). Self-escaping means MM->M (only outside calls). fmt writers must pick non-colliding brace | quotes.

RETURN VALUE is intended to be assigned to an (id, arg, call) tuple where fmt[id|arg|call] can extract that thing from fmt. id.idIsLiteral is true for a special macro meaning fmt[arg] is trailing literal text. A fully worked out example with tests is examples/tmpl.nim.