This module defines MSlice - basically a non-garbage-collected string - and various utility iterators & procs for it such as mSlices&msplit. There are basically 3 kinds of splitting - file-line-like, and then delimited by one byte, by a set of bytes (both either repeatable|not). The latter two styles can also be bounded by a number of splits/number of outputs and accept either MSlice or string as inputs to produce the seq[MSlice].This is a portability shim include file for when you want code to work without deprecation warnings under younger & older Nim compilers. Just include cligen/unsafeAdr at the global scope before using unsafeAddr.
Types
MSlice = object mem*: pointer len*: int
- Represent a memory slice, such as a delimited record in an MFile. Care is required to access MSlice data (think C mem* not str*). Use $ | toString to get a buffer for safer/more compatible work.
Sep = tuple[repeat: bool, chrDlm: char, setDlm: set[char], n: int]
SomeString = string | openArray[char] | MSlice
Consts
pow10: array[-308 .. 308, float] = [1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, 1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291, 1e-290, 1e-289, 1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, 1e-281, 1e-280, 1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, 1e-272, 1e-271, 1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, 1e-263, 1e-262, 1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255, 1e-254, 1e-253, 1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, 1e-245, 1e-244, 1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237, 1e-236, 1e-235, 1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228, 1e-227, 1e-226, 1e-225, 1e-224, 1e-223, 1e-222, 1e-221, 1e-220, 1e-219, 1e-218, 1e-217, 1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210, 1e-209, 1e-208, 1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201, 1e-200, 1e-199, 1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, 1e-191, 1e-190, 1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, 1e-182, 1e-181, 1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, 1e-173, 1e-172, 1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, 1e-164, 1e-163, 1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, 1e-155, 1e-154, 1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147, 1e-146, 1e-145, 1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138, 1e-137, 1e-136, 1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129, 1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, 1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, 1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, 1e-101, 1e-100, 1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, 1e-83, 1e-82, 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, 10000000.0, 100000000.0, 1000000000.0, 10000000000.0, 100000000000.0, 1000000000000.0, 10000000000000.0, 100000000000000.0, 1000000000000000.0, 10000000000000000.0, 1e+17, 1e+18, 1e+19, 1e+20, 1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40, 1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60, 1e+61, 1e+62, 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80, 1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, 1e+99, 1e+100, 1e+101, 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107, 1e+108, 1e+109, 1e+110, 1e+111, 1e+112, 1e+113, 1e+114, 1e+115, 1e+116, 1e+117, 1e+118, 1e+119, 1e+120, 1e+121, 1e+122, 1e+123, 1e+124, 1e+125, 1e+126, 1e+127, 1e+128, 1e+129, 1e+130, 1e+131, 1e+132, 1e+133, 1e+134, 1e+135, 1e+136, 1e+137, 1e+138, 1e+139, 1e+140, 1e+141, 1e+142, 1e+143, 1e+144, 1e+145, 1e+146, 1e+147, 1e+148, 1e+149, 1e+150, 1e+151, 1e+152, 1e+153, 1e+154, 1e+155, 1e+156, 1e+157, 1e+158, 1e+159, 1e+160, 1e+161, 1e+162, 1e+163, 1e+164, 1e+165, 1e+166, 1e+167, 1e+168, 1e+169, 1e+170, 1e+171, 1e+172, 1e+173, 1e+174, 1e+175, 1e+176, 1e+177, 1e+178, 1e+179, 1e+180, 1e+181, 1e+182, 1e+183, 1e+184, 1e+185, 1e+186, 1e+187, 1e+188, 1e+189, 1e+190, 1e+191, 1e+192, 1e+193, 1e+194, 1e+195, 1e+196, 1e+197, 1e+198, 1e+199, 1e+200, 1e+201, 1e+202, 1e+203, 1e+204, 1e+205, 1e+206, 1e+207, 1e+208, 1e+209, 1e+210, 1e+211, 1e+212, 1e+213, 1e+214, 1e+215, 1e+216, 1e+217, 1e+218, 1e+219, 1e+220, 1e+221, 1e+222, 1e+223, 1e+224, 1e+225, 1e+226, 1e+227, 1e+228, 1e+229, 1e+230, 1e+231, 1e+232, 1e+233, 1e+234, 1e+235, 1e+236, 1e+237, 1e+238, 1e+239, 1e+240, 1e+241, 1e+242, 1e+243, 1e+244, 1e+245, 1e+246, 1e+247, 1e+248, 1e+249, 1e+250, 1e+251, 1e+252, 1e+253, 1e+254, 1e+255, 1e+256, 1e+257, 1e+258, 1e+259, 1e+260, 1e+261, 1e+262, 1e+263, 1e+264, 1e+265, 1e+266, 1e+267, 1e+268, 1e+269, 1e+270, 1e+271, 1e+272, 1e+273, 1e+274, 1e+275, 1e+276, 1e+277, 1e+278, 1e+279, 1e+280, 1e+281, 1e+282, 1e+283, 1e+284, 1e+285, 1e+286, 1e+287, 1e+288, 1e+289, 1e+290, 1e+291, 1e+292, 1e+293, 1e+294, 1e+295, 1e+296, 1e+297, 1e+298, 1e+299, 1e+300, 1e+301, 1e+302, 1e+303, 1e+304, 1e+305, 1e+306, 1e+307, 1e+308]
- pow10[i] = 10^i as a float
wspace = {' ', '\t', '\v', '\r', '\n', '\f'}
- == strutils.Whitespace
Procs
proc `+!`(p: pointer; i: int): pointer {.inline, ...raises: [], tags: [], forbids: [].}
proc `+!`(p: pointer; i: uint64): pointer {.inline, ...raises: [], tags: [], forbids: [].}
proc `-!`(p, q: pointer): int {.inline, ...raises: [], tags: [], forbids: [].}
proc clipAtFirst(s: var SomeString; ch: char)
- Shorten s to stop just before first occurrence of ch (e.g. '#') if any.
proc cmemchr(s: pointer; c: char; n: csize): pointer {.importc: "memchr", header: "<string.h>", ...raises: [], tags: [], forbids: [].}
proc cmemcmp(a, b: pointer; n: csize): cint {.importc: "memcmp", header: "<string.h>", noSideEffect, ...raises: [], tags: [], forbids: [].}
proc cmemcpy(a, b: pointer; n: csize): cint {.importc: "memcpy", header: "<string.h>", noSideEffect, ...raises: [], tags: [], forbids: [].}
proc cmemmem(h: pointer; nH: csize; s: pointer; nS: csize): pointer {. importc: "memmem", header: "string.h", ...raises: [], tags: [], forbids: [].}
proc cmemrchr(s: pointer; c: char; n: csize): pointer {.importc: "memrchr", header: "string.h", ...raises: [], tags: [], forbids: [].}
proc endsWith(s: MSlice; sfx: SomeString): bool
- Like strutils.endsWith.
proc find(s: MSlice; sub: SomeString): int
- Like strutils.find.
proc findNot(s: string; chars: set[char]; start: Natural = 0; last = 0): int {. ...raises: [], tags: [], forbids: [].}
-
Searches for NOT chars in s inside inclusive range start..last. If last is unspecified, it defaults to s.high (the last element).
If s contains none of the characters in chars, -1 is returned. Otherwise the index returned is relative to s[0], not start. Use s[start..last].find for a start-origin index.
See also:
proc initSep(seps: string): Sep {....raises: [ValueError], tags: [], forbids: [].}
- Abstract single-string hybrid spec of maybe repeat-folding 1-char | maybe repeat-folding char set separation. Specifically, if any char of seps repeats, separators fold while value diversity implies char set separation. A magic val "white" = folding white space chars. E.g.: "," = strict CSV, "<SPC><SPC>" = folding spaces " " = strict spaces. To simply skip maybe nested backslash-escaping, '0', 't', and 'n' mean NUL, TAB, NEWLINE.
proc initSplitr(seps: string): Sep {....deprecated: "use initSep", raises: [ValueError], tags: [], forbids: [].}
proc mem(s: openArray[char]): pointer {....raises: [], tags: [], forbids: [].}
- Make it easy to write a SomeString proc
proc mempbrk(s: pointer; accept: set[char]; n: csize): pointer {.inline, ...raises: [], tags: [], forbids: [].}
proc msplit(mslc: MSlice; fs: var seq[MSlice]; sep = ' '; eat = '\x00') {. ...raises: [], tags: [], forbids: [].}
- Use mslices iterator to populate fields seq[MSlice] fs.
proc msplit(s: MSlice; fs: var seq[MSlice]; sep = ' '; n = 0; repeat = false): int {. ...raises: [], tags: [], forbids: [].}
proc msplit(s: MSlice; fs: var seq[MSlice]; seps = wspace; n = 0; repeat = true): int {. ...raises: [], tags: [], forbids: [].}
proc msplit(s: MSlice; n = 0; seps = wspace; repeat = true): seq[MSlice] {. inline, ...raises: [], tags: [], forbids: [].}
proc msplit(s: MSlice; sep = ' '; n = 0; repeat = false): seq[MSlice] {.inline, ...raises: [], tags: [], forbids: [].}
proc msplit(s: string; fs: var seq[MSlice]; sep = ' '; n = 0; repeat = false): int {. ...raises: [], tags: [], forbids: [].}
- msplit w/reused fs[] & bounded cols n. discard msplit(..).
proc msplit(s: string; fs: var seq[MSlice]; seps = wspace; n = 0; repeat = true): int {. ...raises: [], tags: [], forbids: [].}
- Fast msplit with cached fs[] and single-char-of-set delimiter. n >= 2.
proc nextSlice(mslc, ms: var MSlice; sep = '\n'; eat = '\x00'): int {. ...raises: [], tags: [], forbids: [].}
-
Stores everything from the start of mslc up to excluding the next sep in ms and advances the input slice mslc to after the next separator. Optionally removes eat-suffixed char from the end of the resulting slice.
Returns the number of advanced characters.
If no further sep is found in the input, the remaining slice is in ms and mslc will be considered empty.
If mslc is nil, ms remains unchanged.
This procedure is somewhat analogous to reading from a stream, in the sense that the input slice is drained.
proc nSplit(n: int; data: MSlice; sep = '\n'): seq[MSlice] {....raises: [], tags: [], forbids: [].}
- Split data into n roughly equal parts delimited by sep with any separator included in slices. result.len can be < n for small data sizes (in number of seps, not bytes). For IO efficiency, subdivision is done by bytes as a guess. So, this is fast, but accuracy is limited by statistical regularity.
proc parseFloat(s: MSlice | openArray[char]; eoNum: var int = doNotUse): float
proc split(s: Sep; line: MSlice; cols: var seq[MSlice]; n = 0) {.inline, ...raises: [], tags: [], forbids: [].}
proc splitr(s: string; fs: var seq[string]; sep = ' '; n = 0; repeat = false; sp: ptr seq[string] = nil): int {....raises: [], tags: [], forbids: [].}
- split w/reused fs[] & bounded cols n, maybe-repeatable sep.
proc splitr(s: string; fs: var seq[string]; seps = wspace; n = 0; repeat = true; sp: ptr seq[string] = nil): int {....raises: [], tags: [], forbids: [].}
- split w/reused fs[], bounded cols char-of-set sep which can maybe repeat.
proc splitr(s: string; sep: char; n = 0; repeat = false): seq[string] {.inline, ...raises: [], tags: [], forbids: [].}
- Like splitr(string, var seq[string], int, char), but return the seq.
proc splitr(s: string; seps = wspace; n = 0; repeat = true): seq[string] {. inline, ...raises: [], tags: [], forbids: [].}
- Like splitr(string, var seq[string], int, set[char]),but return seq.
proc startsWith(s: MSlice; pfx: SomeString): bool
- Like strutils.startsWith.
proc stripLeading(s: MSlice; chars = wspace): MSlice {....raises: [], tags: [], forbids: [].}
proc stripLeading(s: var MSlice; chars = wspace) {....raises: [], tags: [], forbids: [].}
proc stripTrailing(s: MSlice; chars = wspace): MSlice {....raises: [], tags: [], forbids: [].}
proc stripTrailing(s: var MSlice; chars = wspace) {....raises: [], tags: [], forbids: [].}
proc toCstr(p: pointer): cstring {.inline, ...raises: [], tags: [], forbids: [].}
- PROBABLY UNTERMINATED cstring. BE VERY CAREFUL.
Iterators
iterator frame(s: MSlice; sep: char; repeat = false; n = 0): TextFrame {. ...raises: [], tags: [], forbids: [].}
- Iterate over TextFrames (data|sep slices) in s delimited by a single char sep split <=n times. Repeats are folded if repeat.
iterator mSlices(mslc: MSlice; sep = ' '; eat = '\x00'): MSlice {....raises: [], tags: [], forbids: [].}
-
Iterate over {optionally eat-suffixed} sep-delimited slices in mslc. Delimiters are NOT part of returned slices. Pass eat='\0' to be strictly sep-delimited. A final, unterminated record is returned like any other. You can swap sep & eat to ignore any optional prefix except '\0'. This is similar to "lines parsing". E.g. usage:
import mfile; var count = 0 #Count initial '#' comment lines for slice in mSlices(mopen("foo").toMSlice): if slice.len > 0 and slice[0] != '#': count.inc
Templates
template toOpenArrayChar(ms: MSlice): untyped
template toOpenArrayChar(s: string): untyped
- This is so you can call toOpenArrayChar on a SomeString parameter.