• [gentoo-dev] [PATCH 1/2] esed.eclass: new eclass

    From Ionen Wolkens@21:1/5 to All on Tue May 31 13:30:01 2022
    Signed-off-by: Ionen Wolkens <ionen@gentoo.org>
    ---
    eclass/esed.eclass | 199 +++++++++++++++++++++++++++++++++++++++++++++
    1 file changed, 199 insertions(+)
    create mode 100644 eclass/esed.eclass

    diff --git a/eclass/esed.eclass b/eclass/esed.eclass
    new file mode 100644
    index 00000000000..69f546804c4
    --- /dev/null
    +++ b/eclass/esed.eclass
    @@ -0,0 +1,199 @@
    +# Copyright 2022 Gentoo Authors
    +# Distributed under the terms of the GNU General Public License v2
    +
    +# @ECLASS: esed.eclass
    +# @MAINTAINER:
    +# Ionen Wolkens <ionen@gentoo.org>
    +# @AUTHOR:
    +# Ionen Wolkens <ionen@gentoo.org>
    +# @SUPPORTED_EAPIS: 8
    +# @BLURB: sed(1) wrappers that die if expressions did not modify any files
    +# @EXAMPLE:
    +#
    +# @CODE
    +# esed 's/a/b/' src/file.c # -i is default, dies if 'a' does not become 'b'
    +#
    +# enewsed 's/a/b/' project.pc.in "${T}"/project.pc # stdin/out not supported +#
    +# esedfind . -type f -name '*.c' -esed 's/a/b/' # dies if zero files changed +#
    +# local esedexps=(
    +# # dies if /any/ of these did nothing, -e 's/a/b/' -e 's/c/d/
  • From Ionen Wolkens@21:1/5 to Ionen Wolkens on Tue May 31 15:50:01 2022
    On Tue, May 31, 2022 at 07:23:18AM -0400, Ionen Wolkens wrote:
    + if [[ ${contents[i]} != "${newcontents}" ]]; then
    + changed=1
    + [[ -v verbose ]] || break
    + fi
    +
    + [[ -v verbose ]] &&
    + diff -u --color --label="${files[i]}"{,} \
    + <(echo "${contents[i]}") <(echo "${newcontents}")

    self nitpick, didn't think much of these given optional but diff has
    nothing to say if contents == newcontents so will replace with:

    if [[ ${contents[i]} != "${newcontents}" ]]; then
    changed=1

    [[ -v verbose ]] || break

    diff -u --color --label="${files[i]}"{,} \
    <(echo "${contents[i]}") <(echo "${newcontents}")
    fi

    + [[ ${contents} != "${newcontents}" ]] && changed=1
    +
    + [[ -v verbose ]] &&
    + diff -u --color --label="${files[*]}" --label="${_esed_output}" \
    + <(echo "${contents}") <(echo "${newcontents}")

    and:

    if [[ ${contents} != "${newcontents}" ]]; then
    changed=1

    [[ -v verbose ]] &&
    diff -u --color --label="${files[*]}" --label="${_esed_output}" \
    <(echo "${contents}") <(echo "${newcontents}")
    fi

    (updated on github PR)

    --
    ionen

    -----BEGIN PGP SIGNATURE-----

    iQEzBAABCAAdFiEEx3SLh1HBoPy/yLVYskQGsLCsQzQFAmKWG+oACgkQskQGsLCs QzTrjAf+Ibt+73GjVOsrK+0TpYXUjzhxZxtC4es84BS2+KLLdotXG24q8YqpKh9+ BcJ19Xh2Z92WBz53lCRaen3sDSokICcJjJI/Z7qRPNVrHq07CgYR6LfK16lIpvTh bd372sl4DspKvbQJSGdvNxuycsyb2y639stSPguYOG2Uq92W9hoFPUYxaKq4CYpf PJ0mKRPGMKLtBjB7IU8UxFdrEig/fZwP02czH4XcmGYNITkXYwvEQ07Rm7TyKqls +fMtcyXazOZG3NI41mXUOXck1AXEQqto2xKFRq1iyN2pfZ3xyr8rxhipWRAQWRfY kkX73dKhBIsm8GXsgQCHiJP61/CKiQ==
    =LERh
    -----END PGP SIGNATURE-----

    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)
  • From Ulrich Mueller@21:1/5 to All on Fri Jun 3 09:20:01 2022
    On Tue, 31 May 2022, Ionen Wolkens wrote:

    +# @FUNCTION: esed
    +# @USAGE: <sed-argument>...
    +# @DESCRIPTION:
    +# sed(1) wrapper that dies if the expression(s) did not modify any files.
    +# sed's -i/--in-place is forced, and so stdin/out cannot be used.

    This sounds like a simple enough task ...

    +esed() {
    + local -i i
    +
    + if [[ ${esedexps@a} =~ a ]]; then
    + # expression must be before -- but after the rest for e.g. -E to work
    + local -i pos
    + for ((pos=1; pos<=${#}; pos++)); do
    + [[ ${!pos} == -- ]] && break
    + done
    +
    + for ((i=0; i<${#esedexps[@]}; i++)); do
    + [[ ${esedexps[i]} ]] &&
    + esedexps= esed "${@:1:pos-1}" -e "${esedexps[i]}" "${@:pos}"
    + done
    +
    + unset esedexps
    + return 0
    + fi
    +
    + # Roughly attempt to find files in arguments by checking if it's a
    + # readable file (aka s/// is not a file) and does not start with -
    + # (unless after --), then store contents for comparing after sed.
    + local contents=() endopts files=()
    + for ((i=1; i<=${#}; i++)); do
    + if [[ ${!i} == -- && ! -v endopts ]]; then
    + endopts=1
    + elif [[ ${!i} =~ ^(-i|--in-place)$ && ! -v endopts ]]; then
    + # detect rushed sed -i -> esed -i, -i also silently breaks enewsed
    + die "passing ${!i} to ${FUNCNAME[0]} is invalid"
    + elif [[ ${!i} =~ ^(-f|--file)$ && ! -v endopts ]]; then
    + i+=1 # ignore script files
    + elif [[ ( ${!i} != -* || -v endopts ) && -f ${!i} && -r ${!i} ]]; then
    + files+=( "${!i}" )
    +
    + # eval 2>/dev/null to silence \0 warnings if sed binary files
    + eval 'contents+=( "$(<"${!i}")" )' 2>/dev/null \
    + || die "failed to read: ${!i}"
    + fi
    + done
    + (( ${#files[@]} )) || die "no readable files found from '${*}' arguments"
    +
    + local verbose
    + [[ ${ESED_VERBOSE} ]] && type diff &>/dev/null && verbose=1
    +
    + local changed newcontents
    + if [[ -v _esed_output ]]; then
    + [[ -v verbose ]] &&
    + einfo "${FUNCNAME[0]}: sed ${*} > ${_esed_output} ..." +
    + sed "${@}" > "${_esed_output}" \
    + || die "failed to run: sed ${*} > ${_esed_output}"
    +
    + eval 'newcontents=$(<${_esed_output})' 2>/dev/null \
    + || die "failed to read: ${_esed_output}"
    +
    + local IFS=$'\n' # sed concats with newline even if none at EOF + contents=${contents[*]}
    + unset IFS
    +
    + [[ ${contents} != "${newcontents}" ]] && changed=1
    +
    + [[ -v verbose ]] &&
    + diff -u --color --label="${files[*]}" --label="${_esed_output}" \
    + <(echo "${contents}") <(echo "${newcontents}") + else
    + [[ -v verbose ]] && einfo "${FUNCNAME[0]}: sed -i ${*} ..."
    +
    + sed -i "${@}" || die "failed to run: sed -i ${*}"
    +
    + for ((i=0; i<${#files[@]}; i++)); do
    + eval 'newcontents=$(<"${files[i]}")' 2>/dev/null \
    + || die "failed to read: ${files[i]}"
    +
    + if [[ ${contents[i]} != "${newcontents}" ]]; then
    + changed=1
    + [[ -v verbose ]] || break
    + fi
    +
    + [[ -v verbose ]] &&
    + diff -u --color --label="${files[i]}"{,} \
    + <(echo "${contents[i]}") <(echo "${newcontents}")
    + done
    + fi
    +
    + [[ -v changed ]] \
    + || die "no-op: ${FUNCNAME[0]} ${*}${_esed_command:+ (from: ${_esed_command})}"
    +}

    ... but then it's almost 100 lines of shell code, including very
    convoluted parsing of parameters. The code for detection whether a
    parameter is actually a file is a heuristic at best and looks rather
    brittle. Also, don't use eval because it is evil.

    So IMHO this isn't the right approach to the problem. If anything, make
    it a simple function with well defined arguments, e.g. exactly one
    expression and exactly one file, and call "sed -i" on it.

    Then again, we had something similar in the past ("dosed" as a package
    manager command) but later banned it for good reason.

    Detection whether the file contents changed also seems complicated.
    Why not compute a hash of the file before and after sed operated on it?
    md5sum or even cksum should be good enough, since security isn't an
    issue here.

    Ulrich

    -----BEGIN PGP SIGNATURE-----

    iQFDBAEBCAAtFiEEtDnZ1O9xIP68rzDbUYgzUIhBXi4FAmKZtL0PHHVsbUBnZW50 b28ub3JnAAoJEFGIM1CIQV4uZLUH/RxJWupkXjpyw8DcdEE1o8mlKsYqmdrbDujF buzsNzAMvnVsWhikCUYbINnwjWkXGCKbClXgWb1hDkNpjjWIi546crtShurDHnc2 eGUFyNo6pF8GWnWS645liRNoUrdRtoU+HuUck9rpjI2qtfornmuIelL2YOJ+8Txn 5uX982btSrVzps6ttblFlkL4VxnQHBBdtfK199KDBBf0bOTIUqR+N0/7gkfTTcGI RQnB5hcfClyMMutn5U8+s5LjnY/r1h4zey3mW+l3l3RW3tn6sZp0VvInMs0emo8L 7h4O4vd+mf3Ts945Y3MuCOrbBgncIKGThDD99B79Cye2dzVi/hs=
    =6d5n
    -----END PGP SIGNATURE-----

    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)
  • From Ionen Wolkens@21:1/5 to Ulrich Mueller on Fri Jun 3 11:30:01 2022
    On Fri, Jun 03, 2022 at 09:14:05AM +0200, Ulrich Mueller wrote:
    On Tue, 31 May 2022, Ionen Wolkens wrote:

    +# @FUNCTION: esed
    +# @USAGE: <sed-argument>...
    +# @DESCRIPTION:
    +# sed(1) wrapper that dies if the expression(s) did not modify any files. +# sed's -i/--in-place is forced, and so stdin/out cannot be used.

    This sounds like a simple enough task ...

    +esed() {
    + local -i i
    +
    + if [[ ${esedexps@a} =~ a ]]; then
    + # expression must be before -- but after the rest for e.g. -E to work
    + local -i pos
    + for ((pos=1; pos<=${#}; pos++)); do
    + [[ ${!pos} == -- ]] && break
    + done
    +
    + for ((i=0; i<${#esedexps[@]}; i++)); do
    + [[ ${esedexps[i]} ]] &&
    + esedexps= esed "${@:1:pos-1}" -e "${esedexps[i]}" "${@:pos}"
    + done
    +
    + unset esedexps
    + return 0
    + fi
    +
    + # Roughly attempt to find files in arguments by checking if it's a
    + # readable file (aka s/// is not a file) and does not start with -
    + # (unless after --), then store contents for comparing after sed.
    + local contents=() endopts files=()
    + for ((i=1; i<=${#}; i++)); do
    + if [[ ${!i} == -- && ! -v endopts ]]; then
    + endopts=1
    + elif [[ ${!i} =~ ^(-i|--in-place)$ && ! -v endopts ]]; then
    + # detect rushed sed -i -> esed -i, -i also silently breaks enewsed
    + die "passing ${!i} to ${FUNCNAME[0]} is invalid"
    + elif [[ ${!i} =~ ^(-f|--file)$ && ! -v endopts ]]; then
    + i+=1 # ignore script files
    + elif [[ ( ${!i} != -* || -v endopts ) && -f ${!i} && -r ${!i} ]]; then
    + files+=( "${!i}" )
    +
    + # eval 2>/dev/null to silence \0 warnings if sed binary files
    + eval 'contents+=( "$(<"${!i}")" )' 2>/dev/null \
    + || die "failed to read: ${!i}"
    + fi
    + done
    + (( ${#files[@]} )) || die "no readable files found from '${*}' arguments"
    +
    + local verbose
    + [[ ${ESED_VERBOSE} ]] && type diff &>/dev/null && verbose=1
    +
    + local changed newcontents
    + if [[ -v _esed_output ]]; then
    + [[ -v verbose ]] &&
    + einfo "${FUNCNAME[0]}: sed ${*} > ${_esed_output} ..." +
    + sed "${@}" > "${_esed_output}" \
    + || die "failed to run: sed ${*} > ${_esed_output}"
    +
    + eval 'newcontents=$(<${_esed_output})' 2>/dev/null \
    + || die "failed to read: ${_esed_output}"
    +
    + local IFS=$'\n' # sed concats with newline even if none at EOF + contents=${contents[*]}
    + unset IFS
    +
    + [[ ${contents} != "${newcontents}" ]] && changed=1
    +
    + [[ -v verbose ]] &&
    + diff -u --color --label="${files[*]}" --label="${_esed_output}" \
    + <(echo "${contents}") <(echo "${newcontents}") + else
    + [[ -v verbose ]] && einfo "${FUNCNAME[0]}: sed -i ${*} ..."
    +
    + sed -i "${@}" || die "failed to run: sed -i ${*}"
    +
    + for ((i=0; i<${#files[@]}; i++)); do
    + eval 'newcontents=$(<"${files[i]}")' 2>/dev/null \
    + || die "failed to read: ${files[i]}"
    +
    + if [[ ${contents[i]} != "${newcontents}" ]]; then
    + changed=1
    + [[ -v verbose ]] || break
    + fi
    +
    + [[ -v verbose ]] &&
    + diff -u --color --label="${files[i]}"{,} \
    + <(echo "${contents[i]}") <(echo "${newcontents}")
    + done
    + fi
    +
    + [[ -v changed ]] \
    + || die "no-op: ${FUNCNAME[0]} ${*}${_esed_command:+ (from: ${_esed_command})}"
    +}

    ... but then it's almost 100 lines of shell code, including very
    convoluted parsing of parameters. The code for detection whether a
    parameter is actually a file is a heuristic at best and looks rather

    Even if it does pickup sed -e "s_im-a_real-file_", it's a semi
    non-issue given the file won't change before and after and it'll
    evaluate the others for difference instead.

    Where it gets a bit more complicated is enewsed given this is valid:
    sed s/// file1 file2 > file3
    Of course, could always ban multiple files on enewsed if felt really
    needed, albeit fwiw it's a limitation for a very unlikely scenario.

    "--" support is probably what adds the most complexity here, but
    will have a problem if any ebuild ever needs to sed a -file
    (-- is also useful in esedfind fwiw)

    Also I did write tests so the minor complexity hopefully doesn't
    lead to regressions.

    iwdevtools' qa-sed uses getopt(long) instead, but even that is not
    perfect given sed has special rules (plus wouldn't want to depend
    on || ( getopt util-linux ) here).

    brittle. Also, don't use eval because it is evil.

    This is static evals, they're just evaluating a flat string and it's
    no different than.. well just running it as-is except it works around
    a noise issue (the 2>/dev/null doesn't register without it).

    eval is mostly evil when it's:
    eval "${expanded_variable}=${what_is_this_even}"

    Seeing eval "var=\${not_expanded}" as "evil" makes no sense to me,
    it's a harmless warning silencer. I feel this is just overreaction
    to the eval keyword (also in other dev ML post).

    To reproduce the warning:
    $ var=$(printf "\0") # var=$(<file-with-null-bytes)
    bash: warning: command substitution: ignored null byte in input

    doesn't work: var=$(printf "\0") 2>/dev/null
    works: eval 'var=$(printf "\0")' 2>/dev/null


    So IMHO this isn't the right approach to the problem. If anything, make
    it a simple function with well defined arguments, e.g. exactly one
    expression and exactly one file, and call "sed -i" on it.

    I was hoping it wouldn't be a limitation to using sed, aka just add
    checks and take very little away. Would've even liked to do 100% compat
    like qa-sed (transparent stdin/out support), but that one is more
    complicated and drew the line.

    Lacking multiple files on esed (not enewsed) would be kinda annoying
    I think. esedfind also makes use of the fact it can take multiple
    files for when people need things like:

    find . -name '*.c' -exec sed -i s/__AVX__/__AVX2__/ {} + || die
    (esedfind would die if not one macro was replaced)

    I tried to use esed in several ebuilds and see what felt needed or
    is annoying without hoping people wouldn't mind using it over sed.

    Goal here is not to replace sed nor encourage using it, but just add
    a guard to ensure changes happened if it does get used. I've seen
    plenty of ebuilds with a sed that's been broken for >5-10 years.


    Then again, we had something similar in the past ("dosed" as a package manager command) but later banned it for good reason.

    Well, dosed /used/ to be kinda useful to avoid needing:

    mv file "${T}"/tmpfile || die
    sed s/// "${T}"/tmpfile > file || die

    It's only later that dosed started using -i itself and mostly didn't
    have a purpose anymore given ebuilds could now use -i too, see:

    https://bugs.gentoo.org/152017


    Detection whether the file contents changed also seems complicated.
    Why not compute a hash of the file before and after sed operated on it? md5sum or even cksum should be good enough, since security isn't an
    issue here.

    I don't think this would simplify beside now calling more external
    commands twice then comparing the outputs (capturing command output vs capturing file is not particularly different). May be faster on large
    amount of files but probably not meaningful for single/few files.

    This may make sed concatenation more complicated to handle too (cat
    doesn't concatenate the same way sed does wrt newlines at EOF). Not
    that couldn't ban that on enewsed, although it's more limitations
    over not much.



    All this aside, I'm not adamant about adding this. If not enough
    interest then I'll just drop the idea. May try to implement multiple
    expression support in iwdevtools' qa-sed instead, albeit being a
    separate QA thing that doesn't know the maintainer's intend (aka
    which seds are deterministic or not) and may also be missed, it
    doesn't really ensure ebuilds' seds are kept up to date.

    --
    ionen

    -----BEGIN PGP SIGNATURE-----

    iQEzBAABCAAdFiEEx3SLh1HBoPy/yLVYskQGsLCsQzQFAmKZ074ACgkQskQGsLCs QzSkNwf/T+51LqQCMOU8/NJlf+n7Ois3FKjAHEqZZAjfz2s1cNAD0/C6os6sfRWB JM7fCtE0xXeSRGey2jjS+kCM/yJWhaM+yiBhMZFZYuurv3OQ2O5IzMm+8rLBqqsd tx5s+3gPEN24CIwBwjNaeaRYMWicjGL4cFvlZfbSBQFCxn4pVmRDNzg+04+hUFX1 hvYaJoMlENmkR9/eRql1vJWs22P2YPOMT6+SGW2Weh1DC8OhRXXQh8jXB9hXUNMa rOT06KAB62KxifMpYEvp7U5h7sovtehUDy+fH+zrII4GZ6Rf1YtEwpnM28og7A5L cZBsep6/NfHHuOKkFPAHlR1DklaLKQ==
    =AwcK
    -----END PGP SIGNATURE-----

    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)
  • From Ionen Wolkens@21:1/5 to Ionen Wolkens on Fri Jun 3 13:20:01 2022
    On Fri, Jun 03, 2022 at 05:26:22AM -0400, Ionen Wolkens wrote:
    On Fri, Jun 03, 2022 at 09:14:05AM +0200, Ulrich Mueller wrote:
    [...]
    brittle. Also, don't use eval because it is evil.

    This is static evals, they're just evaluating a flat string and it's
    no different than.. well just running it as-is except it works around
    a noise issue (the 2>/dev/null doesn't register without it).

    eval is mostly evil when it's:
    eval "${expanded_variable}=${what_is_this_even}"

    Seeing eval "var=\${not_expanded}" as "evil" makes no sense to me,
    it's a harmless warning silencer. I feel this is just overreaction
    to the eval keyword (also in other dev ML post).

    To reproduce the warning:
    $ var=$(printf "\0") # var=$(<file-with-null-bytes)
    bash: warning: command substitution: ignored null byte in input

    doesn't work: var=$(printf "\0") 2>/dev/null
    works: eval 'var=$(printf "\0")' 2>/dev/null

    Was experimenting for what else works just now.

    { var=$(printf "\0"); } 2>/dev/null

    I feel like I remember trying this before for qa-sed but don't remember
    why I didn't use it (unlike subshells it does keep the var readable).

    I still don't think that a flat eval was "evil" but if I have a way
    to avoid it I'll use that instead.

    --
    ionen

    -----BEGIN PGP SIGNATURE-----

    iQEzBAABCAAdFiEEx3SLh1HBoPy/yLVYskQGsLCsQzQFAmKZ7kMACgkQskQGsLCs QzSFVgf+NzUWeA4BpdcgxMm1NEJk+Lxvj9Bwqd4q/wORPy/v3o0Hj9lx2Zo2OsfJ pPxifhQBawgEVZZsJL7cjK+jD7Ve0mTlkBi2Ir/mWu8m6wnRcwHFAzbq+CoJEpJv TX4fZy1VydFZPMSnr9A8N0WGasqRb3RpXeN9NClOhHrkPupIp5USs9Sbqxo+Km/5 rO23pJzxblhvFFNhq2iwA6Zwp8fh9eTzxE4SVTzRwRUzXwt6/fFeUnGkkJajMHd4 SYBJ7IH9jGXFHQsLvywOG9ojQmYgEfgohvKoaweScMvpDsGw+FRoNVXHHKlAjEOf zbWHwdYqXjHydH9noie6K6sO9poPAQ==
    =MtjM
    -----END PGP SIGNATURE-----

    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)