1
0
Fork 0
slidge-whatsapp/slidge_whatsapp/attachment.go

387 lines
13 KiB
Go

package whatsapp
import (
// Standard library.
"bufio"
"bytes"
"errors"
"fmt"
"image"
"image/jpeg"
_ "image/png"
"math"
"os"
"os/exec"
"path"
"strconv"
"strings"
"time"
// Third-party packages.
"github.com/h2non/filetype"
_ "golang.org/x/image/webp"
)
// The full path and default arguments for FFmpeg, used for converting media to supported types.
var (
ffmpegCommand, _ = exec.LookPath("ffmpeg")
ffmpegDefaultArgs = []string{"-v", "error", "-y"}
)
// The full path and default arguments for FFprobe, as provided by FFmpeg, used for getting media
// metadata (e.g. duration, waveforms, etc.)
var (
ffprobeCommand, _ = exec.LookPath("ffprobe")
ffprobeDefaultArgs = []string{"-v", "error", "-of", "csv=nokey=0:print_section=0"}
)
const (
// The MIME type used by voice messages on WhatsApp.
voiceMessageMIME = "audio/ogg; codecs=opus"
// the MIME type used by animated images on WhatsApp.
animatedImageMIME = "image/gif"
)
// A ConvertAttachmentFunc is a function that can convert any attachment to another format, given a
// set of arguments.
type convertAttachmentFunc func(*Attachment, ...string) error
// ConvertAttachmentOptions contains options used in converting media between formats via FFmpeg.
type convertAttachmentOptions struct {
mime string // The destination MIME type for the converted media.
call convertAttachmentFunc // The function to use for converting media.
args []string // The arguments to pass to the conversion function.
}
// Attachment conversion specifications.
var (
// The MIME type and conversion arguments used by image messages on WhatsApp.
imageMessageOptions = convertAttachmentOptions{
mime: "image/jpeg",
call: convertImage,
}
// The MIME type and conversion arguments used by voice messages on WhatsApp.
voiceMessageOptions = convertAttachmentOptions{
mime: voiceMessageMIME,
call: convertAudioVideo,
args: []string{
"-f", "ogg", "-c:a", "libopus", // Convert to Ogg with Opus.
"-ac", "1", // Convert to mono.
"-ar", "48000", // Use specific sample-rate of 48000hz.
"-b:a", "64k", // Use relatively reasonable bit-rate of 64kBit/s.
"-map_metadata", "-1", // Remove all metadata from output.
},
}
// The MIME type and conversion arguments used by video messages on WhatsApp.
videoMessageOptions = convertAttachmentOptions{
mime: "video/mp4",
call: convertAudioVideo,
args: []string{
"-f", "mp4", "-c:v", "libx264", // Convert to mp4 with h264.
"-pix_fmt", "yuv420p", // Use YUV 4:2:0 chroma subsampling.
"-profile:v", "baseline", // Use Baseline profile for better compatibility.
"-level", "3.0", // Ensure compatibility with older devices.
"-vf", "pad=ceil(iw/2)*2:ceil(ih/2)*2", // Pad dimensions to ensure height is a factor of 2.
"-r", "25", "-g", "50", // Use 25fps, with an index frame every 50 frames.
"-c:a", "aac", "-b:a", "160k", "-r:a", "44100", // Re-encode audio to AAC, if any.
"-movflags", "+faststart", // Use Faststart for quicker rendering.
"-y", // Overwrite existing output file, where this exists.
},
}
)
// ConvertAttachmentTypes represents a list of media types to convert based on source MIME type.
var convertAttachmentTypes = map[string]convertAttachmentOptions{
"image/png": imageMessageOptions,
"image/webp": imageMessageOptions,
"audio/mp4": voiceMessageOptions,
"audio/aac": voiceMessageOptions,
"audio/ogg; codecs=opus": voiceMessageOptions,
"video/mp4": videoMessageOptions,
"video/webm": videoMessageOptions,
"image/gif": {
mime: videoMessageOptions.mime,
call: videoMessageOptions.call,
args: append([]string{
"-r", "10", // Assume 10fps GIF speed.
}, videoMessageOptions.args...),
},
}
// ConvertAttachment attempts to process a given attachment from a less-supported type to a
// canonically supported one; for example, from `image/png` to `image/jpeg`. Decisions about which
// MIME types to convert to are based on the concrete MIME type inferred from the file itself, and
// care is taken to conform to WhatsApp semantics for the given input MIME type. If the input MIME
// type is unknown, or conversion is impossible, the original attachment is returned unchanged.
func convertAttachment(attach *Attachment) error {
var detectedMIME string
if t, _ := filetype.MatchFile(attach.Path); t != filetype.Unknown {
detectedMIME = t.MIME.Value
if attach.MIME == "" || attach.MIME == "application/octet-stream" {
attach.MIME = detectedMIME
}
}
switch detectedMIME {
case "audio/m4a":
// MP4 audio files are matched as `audio/m4a` which is not a valid MIME, correct this to
// `audio/mp4`, which is what WhatsApp requires as well.
detectedMIME = "audio/mp4"
fallthrough
case "audio/mp4", "audio/ogg":
if err := populateAttachmentMetadata(attach); err == nil {
switch attach.meta.codec {
// Don't attempt to process lossless files at all, as it's assumed that the sender
// wants to retain these characteristics. Since WhatsApp will try (and likely fail)
// to process this as an audio message anyways, set a unique MIME type.
case "alac":
attach.MIME = "application/octet-stream"
return nil
case "opus":
detectedMIME += "; codecs=" + attach.meta.codec
}
}
case "video/mp4":
// Try to see if there's a video stream for ostensibly video-related MIME types, as these are
// some times misdetected as such.
if err := populateAttachmentMetadata(attach); err == nil {
if attach.meta.width == 0 && attach.meta.height == 0 && attach.meta.sampleRate > 0 && attach.meta.duration > 0 {
detectedMIME = "audio/mp4"
}
}
}
// Convert attachment between file-types, if source MIME matches the known list of convertable types.
if o, ok := convertAttachmentTypes[detectedMIME]; ok {
if err := o.call(attach, o.args...); err != nil {
return fmt.Errorf("conversion from %s to %s failed: %s", attach.MIME, o.mime, err)
} else {
attach.MIME = o.mime
}
}
return nil
}
const (
// The maximum image attachment size we'll attempt to process in any way, in bytes.
maxImageSize = 1024 * 1024 * 10 // 10MiB
// The maximum audio/video attachment size we'll attempt to process in any way, in bytes.
maxAudioVideoSize = 1024 * 1024 * 20 // 20MiB
)
// ConvertImage processes the given Attachment, assumed to be an image of a supported format, and
// converting to a JPEG-encoded image in-place. No arguments are processed currently.
func convertImage(attach *Attachment, args ...string) error {
if stat, err := os.Stat(attach.Path); err != nil {
return err
} else if s := stat.Size(); s > maxImageSize {
return fmt.Errorf("attachment size %d exceeds maximum of %d", s, maxImageSize)
}
f, err := os.OpenFile(attach.Path, os.O_RDWR, 0)
if err != nil {
return err
}
img, _, err := image.Decode(f)
if err != nil {
f.Close()
return err
}
f.Close()
if f, err = os.Create(attach.Path); err != nil {
return err
}
if err = jpeg.Encode(f, img, nil); err != nil {
return err
}
return nil
}
// ConvertAudioVideo processes the given Attachment, assumed to be an audio or video file of a
// supported format, according to the arguments given.
func convertAudioVideo(attach *Attachment, args ...string) error {
if ffmpegCommand == "" {
return fmt.Errorf("FFmpeg command not found")
} else if stat, err := os.Stat(attach.Path); err != nil {
return err
} else if s := stat.Size(); s > maxAudioVideoSize {
return fmt.Errorf("attachment size %d exceeds maximum of %d", s, maxAudioVideoSize)
}
tmp, err := os.CreateTemp(path.Dir(attach.Path), path.Base(attach.Path)+".*")
if err != nil {
return fmt.Errorf("failed creating temporary file: %w", err)
}
args = append(ffmpegDefaultArgs, append([]string{"-i", attach.Path}, append(args, tmp.Name())...)...)
cmd := exec.Command(ffmpegCommand, args...)
tmp.Close()
if _, err := cmd.Output(); err != nil {
if e := new(exec.ExitError); errors.As(err, &e) {
return fmt.Errorf("%s: %s", e.Error(), bytes.TrimSpace(e.Stderr))
}
return err
}
if err := os.Rename(tmp.Name(), attach.Path); err != nil {
return fmt.Errorf("failed cleaning up temporary file: %w", err)
}
return nil
}
// GetAttachmentThumbnail returns a static thumbnail in JPEG format from the given Attachment, assumed
// to point to a video file. If no thumbnail could be generated for any reason, this returns nil.
func getAttachmentThumbnail(attach *Attachment) ([]byte, error) {
var tmp string
if data, err := os.ReadFile(attach.Path); err != nil {
return nil, fmt.Errorf("failed reading attachment %s: %w", attach.Path, err)
} else if tmp, err = createTempFile(data); err != nil {
return nil, err
}
defer os.Remove(tmp)
var buf []byte
args := []string{"-f", "mjpeg", "-vf", "scale=500:-1", "-qscale:v", "5", "-frames:v", "1"}
if err := convertAudioVideo(&Attachment{Path: tmp}, args...); err != nil {
return nil, err
} else if buf, err = os.ReadFile(tmp); err != nil {
return nil, fmt.Errorf("failed reading converted file: %w", err)
}
return buf, nil
}
// AttachmentMetadata represents secondary information for a given audio/video buffer. This information
// is usually gathered on a best-effort basis, and thus may be missing even for otherwise valid
// media buffers.
type attachmentMetadata struct {
codec string // The codec used for the primary stream in this attachment.
width int // The calculated width of the given video buffer; 0 if there's no video stream.
height int // The calculated height of the given video buffer; 0 if there's no video stream.
sampleRate int // The calculated sample rate of the given audio buffer; usually not set for video streams.
duration time.Duration // The duration of the given audio/video stream.
}
// PopulateAttachmentMetadata calculates and populates secondary information for the given
// audio/video attachment, if any. Metadata is gathered on a best-effort basis, and may be missing;
// see the documentation for [attachmentMetata] for more information.
func populateAttachmentMetadata(attach *Attachment) error {
if ffprobeCommand == "" {
return fmt.Errorf("FFprobe command not found")
} else if stat, err := os.Stat(attach.Path); err != nil {
return err
} else if s := stat.Size(); s > maxAudioVideoSize {
return fmt.Errorf("attachment size %d exceeds maximum of %d", s, maxAudioVideoSize)
}
args := append(ffprobeDefaultArgs, []string{
"-i", attach.Path,
"-show_entries", "stream=codec_name,width,height,sample_rate,duration",
}...)
cmd := exec.Command(ffprobeCommand, args...)
stdout, err := cmd.StdoutPipe()
if err != nil {
return fmt.Errorf("failed to set up standard output: %s", err)
} else if err = cmd.Start(); err != nil {
return fmt.Errorf("failed to initialize command: %s", err)
}
var meta attachmentMetadata
scanner := bufio.NewScanner(stdout)
for scanner.Scan() {
for _, f := range strings.Split(scanner.Text(), ",") {
k, v, _ := strings.Cut(strings.TrimSpace(f), "=")
switch k {
case "codec_name":
meta.codec = v
case "duration":
if v, err := strconv.ParseFloat(v, 64); err == nil {
meta.duration = time.Duration(v * float64(time.Second))
}
case "width":
if v, err := strconv.Atoi(v); err == nil {
meta.width = v
}
case "height":
if v, err := strconv.Atoi(v); err == nil {
meta.height = v
}
case "sample_rate":
if v, err := strconv.Atoi(v); err == nil {
meta.sampleRate = v
}
}
}
}
if err = cmd.Wait(); err != nil {
return fmt.Errorf("failed to wait for command to complete: %s", err)
} else if err = scanner.Err(); err != nil {
return fmt.Errorf("failed scanning command output: %s", err)
}
attach.meta = meta
return nil
}
const (
// The maximum number of samples to return in media waveforms.
maxWaveformSamples = 64
)
// GetAttachmentWaveform returns the computed waveform for the attachment given, as a series of 64
// numbers ranging from 0 to 100. Any errors in computing the waveform will have this function
// return a nil result.
func getAttachmentWaveform(attach *Attachment) ([]byte, error) {
if ffprobeCommand == "" {
return nil, fmt.Errorf("FFprobe command not found")
} else if stat, err := os.Stat(attach.Path); err != nil {
return nil, err
} else if s := stat.Size(); s > maxAudioVideoSize {
return nil, fmt.Errorf("attachment size %d exceeds maximum of %d", s, maxAudioVideoSize)
} else if attach.meta.sampleRate == 0 || attach.meta.duration == 0 {
return nil, fmt.Errorf("empty sample-rate or duration")
}
var samples = make([]byte, 0, maxWaveformSamples)
var numSamples = int(float64(attach.meta.sampleRate)*attach.meta.duration.Seconds()) / maxWaveformSamples
// Determine number of waveform to take based on duration and sample-rate of original file.
// Get waveform with 64 samples, and scale these from a range of 0 to 100.
args := append(ffprobeDefaultArgs, []string{
"-f", "lavfi",
"-i", "amovie=" + attach.Path + ",asetnsamples=" + strconv.Itoa(numSamples) + ",astats=metadata=1:reset=1",
"-show_entries", "frame_tags=lavfi.astats.Overall.Peak_level",
}...)
var buf bytes.Buffer
cmd := exec.Command(ffprobeCommand, args...)
cmd.Stdout = &buf
if err := cmd.Run(); err != nil {
return nil, fmt.Errorf("failed to run command: %w", err)
}
scanner := bufio.NewScanner(&buf)
for scanner.Scan() {
_, v, _ := bytes.Cut(scanner.Bytes(), []byte{'='})
db, err := strconv.ParseFloat(string(bytes.Trim(v, "\n\r")), 64)
if err == nil {
samples = append(samples, byte(math.Pow(10, (db/50))*100))
}
}
return samples[:maxWaveformSamples], nil
}