|
| 1 | +package pandoc |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "errors" |
| 6 | + "fmt" |
| 7 | + "io" |
| 8 | + "os" |
| 9 | + "os/exec" |
| 10 | + "path/filepath" |
| 11 | + "runtime" |
| 12 | + "strings" |
| 13 | + "time" |
| 14 | + |
| 15 | + "code.gitea.io/gitea/modules/log" |
| 16 | + "code.gitea.io/gitea/modules/pandoc/internal" |
| 17 | + "code.gitea.io/gitea/modules/process" |
| 18 | + "code.gitea.io/gitea/modules/util" |
| 19 | +) |
| 20 | + |
| 21 | +// Command represents a command with its subcommands or arguments. |
| 22 | +type Command struct { |
| 23 | + prog string |
| 24 | + args []string |
| 25 | + parentContext context.Context |
| 26 | + globalArgsLength int |
| 27 | + brokenArgs []string |
| 28 | +} |
| 29 | + |
| 30 | +// RunOpts represents parameters to run the command. If UseContextTimeout is specified, then Timeout is ignored. |
| 31 | +type RunOpts struct { |
| 32 | + Env []string |
| 33 | + Timeout time.Duration |
| 34 | + UseContextTimeout bool |
| 35 | + |
| 36 | + // Dir is the working dir for the pandoc command |
| 37 | + Dir string |
| 38 | + |
| 39 | + Stdout, Stderr io.Writer |
| 40 | + |
| 41 | + // Stdin is used for passing input to the command |
| 42 | + // The caller must make sure the Stdin writer is closed properly to finish the Run function. |
| 43 | + // Otherwise, the Run function may hang for long time or forever, especially when the Pandoc's context deadline is not the same as the caller's. |
| 44 | + // Some common mistakes: |
| 45 | + // * `defer stdinWriter.Close()` then call `cmd.Run()`: the Run() would never return if the command is killed by timeout |
| 46 | + // * `go { case <- parentContext.Done(): stdinWriter.Close() }` with `cmd.Run(DefaultTimeout)`: the command would have been killed by timeout but the Run doesn't return until stdinWriter.Close() |
| 47 | + // * `go { if stdoutReader.Read() err != nil: stdinWriter.Close() }` with `cmd.Run()`: the stdoutReader may never return error if the command is killed by timeout |
| 48 | + // In the future, ideally the pandoc module itself should have full control of the stdin, to avoid such problems and make it easier to refactor to a better architecture. |
| 49 | + Stdin io.Reader |
| 50 | + |
| 51 | + PipelineFunc func(context.Context, context.CancelFunc) error |
| 52 | +} |
| 53 | + |
| 54 | +var ErrBrokenCommand = errors.New("pandoc command is broken") |
| 55 | + |
| 56 | +// Run runs the command with the RunOpts |
| 57 | +func (c *Command) Run(opts *RunOpts) error { |
| 58 | + return c.run(1, opts) |
| 59 | +} |
| 60 | + |
| 61 | +func (c *Command) run(skip int, opts *RunOpts) error { |
| 62 | + if len(c.brokenArgs) != 0 { |
| 63 | + log.Error("pandoc command is broken: %s, broken args: %s", c.LogString(), strings.Join(c.brokenArgs, " ")) |
| 64 | + return ErrBrokenCommand |
| 65 | + } |
| 66 | + if opts == nil { |
| 67 | + opts = &RunOpts{} |
| 68 | + } |
| 69 | + |
| 70 | + // We must not change the provided options |
| 71 | + timeout := opts.Timeout |
| 72 | + if timeout <= 0 { |
| 73 | + timeout = defaultCommandExecutionTimeout |
| 74 | + } |
| 75 | + |
| 76 | + var desc string |
| 77 | + callerInfo := util.CallerFuncName(1 /* util */ + 1 /* this */ + skip /* parent */) |
| 78 | + if pos := strings.LastIndex(callerInfo, "/"); pos >= 0 { |
| 79 | + callerInfo = callerInfo[pos+1:] |
| 80 | + } |
| 81 | + // these logs are for debugging purposes only, so no guarantee of correctness or stability |
| 82 | + desc = fmt.Sprintf("pandoc.Run(by:%s, repo:%s): %s", callerInfo, logArgSanitize(opts.Dir), c.LogString()) |
| 83 | + log.Debug("pandoc.Command: %s", desc) |
| 84 | + |
| 85 | + var ctx context.Context |
| 86 | + var cancel context.CancelFunc |
| 87 | + var finished context.CancelFunc |
| 88 | + |
| 89 | + if opts.UseContextTimeout { |
| 90 | + ctx, cancel, finished = process.GetManager().AddContext(c.parentContext, desc) |
| 91 | + } else { |
| 92 | + ctx, cancel, finished = process.GetManager().AddContextTimeout(c.parentContext, timeout, desc) |
| 93 | + } |
| 94 | + defer finished() |
| 95 | + |
| 96 | + startTime := time.Now() |
| 97 | + |
| 98 | + cmd := exec.CommandContext(ctx, c.prog, c.args...) |
| 99 | + if opts.Env == nil { |
| 100 | + cmd.Env = os.Environ() |
| 101 | + } else { |
| 102 | + cmd.Env = opts.Env |
| 103 | + } |
| 104 | + |
| 105 | + process.SetSysProcAttribute(cmd) |
| 106 | + cmd.Dir = opts.Dir |
| 107 | + cmd.Stdout = opts.Stdout |
| 108 | + cmd.Stderr = opts.Stderr |
| 109 | + cmd.Stdin = opts.Stdin |
| 110 | + if err := cmd.Start(); err != nil { |
| 111 | + return err |
| 112 | + } |
| 113 | + |
| 114 | + if opts.PipelineFunc != nil { |
| 115 | + err := opts.PipelineFunc(ctx, cancel) |
| 116 | + if err != nil { |
| 117 | + cancel() |
| 118 | + _ = cmd.Wait() |
| 119 | + return err |
| 120 | + } |
| 121 | + } |
| 122 | + |
| 123 | + err := cmd.Wait() |
| 124 | + elapsed := time.Since(startTime) |
| 125 | + if elapsed > time.Second { |
| 126 | + log.Debug("slow pandoc.Command.Run: %s (%s)", c, elapsed) |
| 127 | + } |
| 128 | + |
| 129 | + // We need to check if the context is canceled by the program on Windows. |
| 130 | + // This is because Windows does not have signal checking when terminating the process. |
| 131 | + // It always returns exit code 1, unlike Linux, which has many exit codes for signals. |
| 132 | + if runtime.GOOS == "windows" && |
| 133 | + err != nil && |
| 134 | + err.Error() == "" && |
| 135 | + cmd.ProcessState.ExitCode() == 1 && |
| 136 | + ctx.Err() == context.Canceled { |
| 137 | + return ctx.Err() |
| 138 | + } |
| 139 | + |
| 140 | + if err != nil && ctx.Err() != context.DeadlineExceeded { |
| 141 | + return err |
| 142 | + } |
| 143 | + |
| 144 | + return ctx.Err() |
| 145 | +} |
| 146 | + |
| 147 | +func (c *Command) LogString() string { |
| 148 | + // WARNING: this function is for debugging purposes only. It's much better than old code (which only joins args with space), |
| 149 | + // It's impossible to make a simple and 100% correct implementation of argument quoting for different platforms here. |
| 150 | + debugQuote := func(s string) string { |
| 151 | + if strings.ContainsAny(s, " `'\"\t\r\n") { |
| 152 | + return fmt.Sprintf("%q", s) |
| 153 | + } |
| 154 | + return s |
| 155 | + } |
| 156 | + a := make([]string, 0, len(c.args)+1) |
| 157 | + a = append(a, debugQuote(c.prog)) |
| 158 | + if c.globalArgsLength > 0 { |
| 159 | + a = append(a, "...global...") |
| 160 | + } |
| 161 | + for i := c.globalArgsLength; i < len(c.args); i++ { |
| 162 | + a = append(a, debugQuote(logArgSanitize(c.args[i]))) |
| 163 | + } |
| 164 | + return strings.Join(a, " ") |
| 165 | +} |
| 166 | + |
| 167 | +func logArgSanitize(arg string) string { |
| 168 | + if strings.Contains(arg, "://") && strings.Contains(arg, "@") { |
| 169 | + return util.SanitizeCredentialURLs(arg) |
| 170 | + } else if filepath.IsAbs(arg) { |
| 171 | + base := filepath.Base(arg) |
| 172 | + dir := filepath.Dir(arg) |
| 173 | + return filepath.Join(filepath.Base(dir), base) |
| 174 | + } |
| 175 | + return arg |
| 176 | +} |
| 177 | + |
| 178 | +// NewCommand creates and returns a new Pandoc Command based on given command and arguments. |
| 179 | +// Each argument should be safe to be trusted. User-provided arguments should be passed to AddArgumentValues instead. |
| 180 | +func NewCommand(ctx context.Context, args ...internal.CmdArg) *Command { |
| 181 | + // Make an explicit copy of globalCommandArgs, otherwise append might overwrite it |
| 182 | + cargs := make([]string, 0, len(args)) |
| 183 | + for _, arg := range args { |
| 184 | + cargs = append(cargs, string(arg)) |
| 185 | + } |
| 186 | + return &Command{ |
| 187 | + prog: PandocExecutable, |
| 188 | + args: cargs, |
| 189 | + parentContext: ctx, |
| 190 | + globalArgsLength: 0, |
| 191 | + } |
| 192 | +} |
| 193 | + |
| 194 | +// AddArguments adds new pandoc arguments (option/value) to the command. It only accepts string literals, or trusted CmdArg. |
| 195 | +// Type CmdArg is in the internal package, so it can not be used outside of this package directly, |
| 196 | +// it makes sure that user-provided arguments won't cause RCE risks. |
| 197 | +// User-provided arguments should be passed by other AddXxx functions |
| 198 | +func (c *Command) AddArguments(args ...internal.CmdArg) *Command { |
| 199 | + for _, arg := range args { |
| 200 | + c.args = append(c.args, string(arg)) |
| 201 | + } |
| 202 | + return c |
| 203 | +} |
| 204 | + |
| 205 | +// AddArgumentValues adds new dynamic argument values to the command. |
| 206 | +// The arguments may come from user input and can not be trusted, so no leading '-' is allowed to avoid passing options. |
| 207 | +func (c *Command) AddArgumentValues(args ...string) *Command { |
| 208 | + for _, arg := range args { |
| 209 | + if !isSafeArgumentValue(arg) { |
| 210 | + c.brokenArgs = append(c.brokenArgs, arg) |
| 211 | + } |
| 212 | + } |
| 213 | + if len(c.brokenArgs) != 0 { |
| 214 | + return c |
| 215 | + } |
| 216 | + c.args = append(c.args, args...) |
| 217 | + return c |
| 218 | +} |
| 219 | + |
| 220 | +// isSafeArgumentValue checks if the argument is safe to be used as a value (not an option) |
| 221 | +func isSafeArgumentValue(s string) bool { |
| 222 | + return s == "" || s[0] != '-' |
| 223 | +} |
0 commit comments