package pkg import ( "bytes" "context" "errors" "os" "runtime" "slices" "strconv" "syscall" "time" "hakurei.app/container" "hakurei.app/container/check" "hakurei.app/container/fhs" "hakurei.app/container/std" "hakurei.app/message" ) // AbsWork is the container pathname [CureContext.GetWorkDir] is mounted on. var AbsWork = fhs.AbsRoot.Append("work/") // ExecPath is a slice of [Artifact] and the [check.Absolute] pathname to make // it available at under in the container. type ExecPath struct { // Pathname in the container mount namespace. P *check.Absolute // Artifacts to mount on the pathname, must contain at least one [Artifact]. // If there are multiple entries or W is true, P is set up as an overlay // mount, and entries of A must not implement [File]. A []Artifact // Whether to make the mount point writable via an invisible tmpfs upperdir. W bool } // Path returns a populated [ExecPath]. func Path(pathname *check.Absolute, writable bool, a ...Artifact) ExecPath { return ExecPath{pathname, a, writable} } // MustPath is like [Path], but takes a string pathname via [check.MustAbs]. func MustPath(pathname string, writable bool, a ...Artifact) ExecPath { return ExecPath{check.MustAbs(pathname), a, writable} } // An execArtifact is an [Artifact] that produces output by running a program // part of another [Artifact] in a [container] to produce its output. // // Methods of execArtifact does not modify any struct field or underlying arrays // referred to by slices. type execArtifact struct { // Caller-supplied context. ctx context.Context // Caller-supplied inner mount points. paths []ExecPath // Caller-supplied logging facility, passed through to [container] and used // internally to produce verbose output. msg message.Msg // Number of [Artifact] to concurrently cure. A value of 0 or lower is // equivalent to the value returned by [runtime.NumCPU]. cures int // Passed through to [container.Params]. dir *check.Absolute // Passed through to [container.Params]. env []string // Passed through to [container.Params]. path *check.Absolute // Passed through to [container.Params]. args []string } // execNetArtifact is like execArtifact but implements [KnownChecksum] and has // its resulting container keep the host net namespace. type execNetArtifact struct { checksum Checksum execArtifact } var _ KnownChecksum = new(execNetArtifact) // Checksum returns the caller-supplied checksum. func (a *execNetArtifact) Checksum() Checksum { return a.checksum } // Kind returns the hardcoded [Kind] constant. func (a *execNetArtifact) Kind() Kind { return KindExecNet } // Params is [Checksum] concatenated with [KindExec] params. func (a *execNetArtifact) Params() []byte { return slices.Concat(a.checksum[:], a.execArtifact.Params()) } // Cure cures the [Artifact] by curing all its dependencies then running the // container described by the caller. The container retains host networking. func (a *execNetArtifact) Cure(c *CureContext) error { return a.cure(c, true) } // NewExec returns a new [Artifact] bounded by ctx, it cures all [Artifact] // in paths at the specified maximum concurrent cures limit. Specified paths are // bind mounted read-only in the specified order in the resulting container. // A private instance of /proc and /dev is made available to the container. // // The working and temporary directories are both created and mounted writable // on [AbsWork] and [fhs.AbsTmp] respectively. If one or more paths target // [AbsWork], the final entry is set up as a writable overlay mount on /work for // which the upperdir is the host side work directory. In this configuration, // the W field is ignored, and the program must avoid causing whiteout files to // be created. Cure fails if upperdir ends up with entries other than directory, // regular or symlink. // // If checksum is non-nil, the resulting [Artifact] implements [KnownChecksum] // and its container runs in the host net namespace. // // A cures value of 0 or lower is equivalent to the value returned by // [runtime.NumCPU]. func NewExec( ctx context.Context, msg message.Msg, cures int, checksum *Checksum, dir *check.Absolute, env []string, path *check.Absolute, args []string, paths ...ExecPath, ) Artifact { a := execArtifact{ctx, paths, msg, cures, dir, env, path, args} if checksum == nil { return &a } return &execNetArtifact{*checksum, a} } // Kind returns the hardcoded [Kind] constant. func (a *execArtifact) Kind() Kind { return KindExec } // Params returns paths, executable pathname and args concatenated together. func (a *execArtifact) Params() []byte { var buf bytes.Buffer for _, p := range a.paths { if p.W { buf.WriteByte(1) } else { buf.WriteByte(0) } if p.P != nil { buf.WriteString(p.P.String()) } else { buf.WriteString("invalid P\x00") } buf.WriteByte(0) for _, d := range p.A { id := Ident(d) buf.Write(id[:]) } buf.WriteByte(0) } buf.WriteByte(0) buf.WriteString(a.dir.String()) buf.WriteByte(0) for _, e := range a.env { buf.WriteString(e) } buf.WriteByte(0) buf.WriteString(a.path.String()) buf.WriteByte(0) for _, arg := range a.args { buf.WriteString(arg) } return buf.Bytes() } // Dependencies returns a slice of all artifacts collected from caller-supplied // [ExecPath]. func (a *execArtifact) Dependencies() []Artifact { artifacts := make([][]Artifact, 0, len(a.paths)) for _, p := range a.paths { artifacts = append(artifacts, p.A) } return slices.Concat(artifacts...) } // Cure cures the [Artifact] by curing all its dependencies then running the // container described by the caller. func (a *execArtifact) Cure(c *CureContext) (err error) { return a.cure(c, false) } const ( // execWaitDelay is passed through to [container.Params]. execWaitDelay = time.Nanosecond ) // cure is like Cure but allows optional host net namespace. This is used for // the [KnownChecksum] variant where networking is allowed. func (a *execArtifact) cure(c *CureContext, hostNet bool) (err error) { cures := a.cures if cures < 1 { cures = runtime.NumCPU() } overlayWorkIndex := -1 type curePath struct { // Copied from ExecPath.P. dst *check.Absolute // Cured from ExecPath.A. src []*check.Absolute } paths := make([]curePath, len(a.paths)) for i, p := range a.paths { if p.P == nil || len(p.A) == 0 { return os.ErrInvalid } if p.P.Is(AbsWork) { overlayWorkIndex = i } paths[i].dst = p.P paths[i].src = make([]*check.Absolute, len(p.A)) } var artifactCount int for _, p := range a.paths { artifactCount += len(p.A) } if len(paths) > 0 { type cureArtifact struct { // Index of pending Artifact in paths. index [2]int // Pending artifact. a Artifact } ac := make(chan cureArtifact, artifactCount) for i, p := range a.paths { for j, d := range p.A { ac <- cureArtifact{[2]int{i, j}, d} } } type cureRes struct { // Index of result in paths. index [2]int // Cured pathname. pathname *check.Absolute // Error returned by c. err error } res := make(chan cureRes) for i := 0; i < cures; i++ { go func() { for d := range ac { // computing and encoding identifier is expensive if a.msg.IsVerbose() { a.msg.Verbosef("curing %s...", Encode(Ident(d.a))) } var cr cureRes cr.index = d.index cr.pathname, _, cr.err = c.Cure(d.a) res <- cr } }() } var count int errs := make([]error, 0, artifactCount) for cr := range res { count++ if cr.err != nil { errs = append(errs, cr.err) } else { paths[cr.index[0]].src[cr.index[1]] = cr.pathname } if count == artifactCount { break } } close(ac) if err = errors.Join(errs...); err != nil { return } } ctx, cancel := context.WithCancel(a.ctx) defer cancel() z := container.New(ctx, a.msg) z.WaitDelay = execWaitDelay z.SeccompPresets |= std.PresetStrict z.ParentPerm = 0700 z.HostNet = hostNet z.Hostname = "cure" if z.HostNet { z.Hostname = "cure-net" } z.Uid, z.Gid = (1<<10)-1, (1<<10)-1 if a.msg.IsVerbose() { z.Stdout, z.Stderr = os.Stdout, os.Stderr } z.Dir, z.Env, z.Path, z.Args = a.dir, a.env, a.path, a.args z.Grow(len(paths) + 4) temp, work := c.GetTempDir(), c.GetWorkDir() for i, b := range paths { if i == overlayWorkIndex { if err = os.MkdirAll(work.String(), 0700); err != nil { return } tempWork := temp.Append(".work") if err = os.MkdirAll(tempWork.String(), 0700); err != nil { return } z.Overlay( AbsWork, work, tempWork, b.src..., ) continue } if a.paths[i].W { tempUpper, tempWork := temp.Append( ".upper", strconv.Itoa(i), ), temp.Append( ".work", strconv.Itoa(i), ) if err = os.MkdirAll(tempUpper.String(), 0700); err != nil { return } if err = os.MkdirAll(tempWork.String(), 0700); err != nil { return } z.Overlay(b.dst, tempUpper, tempWork, b.src...) } else if len(b.src) == 1 { z.Bind(b.src[0], b.dst, 0) } else { z.OverlayReadonly(b.dst, b.src...) } } if overlayWorkIndex < 0 { z.Bind( work, AbsWork, std.BindWritable|std.BindEnsure, ) } z.Bind( c.GetTempDir(), fhs.AbsTmp, std.BindWritable|std.BindEnsure, ) z.Proc(fhs.AbsProc).Dev(fhs.AbsDev, true) if err = z.Start(); err != nil { return } if err = z.Serve(); err != nil { return } if err = z.Wait(); err != nil { return } // do not allow empty directories to succeed for { err = syscall.Rmdir(work.String()) if err != syscall.EINTR { break } } if err != nil && errors.Is(err, syscall.ENOTEMPTY) { err = nil } return }