Why C: details

This section contains details substantiating the discussion at Why C?.

I did simple experiments in several languages. In one experiment (cat tests) I just read lines and printed them back out — a line-oriented cat. In another (cut tests) I consumed input lines like x=1,y=2,z=3 one at a time, split them on commas and equals signs to populate hash maps, transformed them (e.g. remove the y field), and emitted them. Basically mlr cut -x -f y with DKVP format. I didn’t do anything fancy — just using each language’s getline, string-split, hashmap-put, etc. (For C, the hashmap logic was my own and has turned into Miller per se.) And nothing was as fast as C, so I used C. Here are the experiments I kept (I failed to keep the Lua code, for example). Note that I re-ran the timings in 2019 using code written in 2015, for purposes of creating this page.

C cat using fgets

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// ----------------------------------------------------------------
static int do_stream(char* file_name) {
	FILE* input_stream  = stdin;
	FILE* output_stream = stdout;

	if (strcmp(file_name, "-")) {
		input_stream = fopen(file_name, "r");
		if (input_stream == NULL) {
			perror(file_name);
			return 0;
		}
	}

	while (1) {
		char* line = NULL;
		size_t linecap = 0;
		ssize_t linelen = getdelim(&line, &linecap, '\n', input_stream);
		if (linelen <= 0) {
			break;
		}
		fputs(line, output_stream);
		free(line);
	}
	if (input_stream != stdin)
		fclose(input_stream);

	return 1;
}

// ================================================================
int main(int argc, char** argv) {
	int ok = 1;
	if (argc == 1) {
		ok = ok && do_stream("-");
	} else {
		for (int argi = 1; argi < argc; argi++) {
		    ok = do_stream(argv[argi]);
		}
	}
	return ok ? 0 : 1;
}

C cat using getdelim

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MYBUFSIZ 8192
static char iobuf[MYBUFSIZ];

// ----------------------------------------------------------------
static int do_stream(char* file_name) {
	FILE* input_stream  = stdin;
	FILE* output_stream = stdout;

	if (strcmp(file_name, "-")) {
		input_stream = fopen(file_name, "r");
		if (input_stream == NULL) {
			perror(file_name);
			return 0;
		}
	}

	while (1) {
		char* line = fgets(iobuf, BUFSIZ, input_stream);
		if (line == NULL)
			break;
		fputs(line, output_stream);
	}
	if (input_stream != stdin)
		fclose(input_stream);

	return 1;
}

// ================================================================
int main(int argc, char** argv) {
	int ok = 1;
	if (argc == 1) {
		ok = ok && do_stream("-");
	} else {
		for (int argi = 1; argi < argc; argi++) {
		    ok = do_stream(argv[argi]);
		}
	}
	return ok ? 0 : 1;
}

C cat using mmap

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/mman.h>

// ----------------------------------------------------------------
static void emit(char* sol, char* eol, FILE* output_stream) {
	 size_t ntowrite = eol - sol;
     size_t nwritten = fwrite(sol, 1, ntowrite, output_stream);
	 if (nwritten != ntowrite) {
		perror("fwrite");
		exit(1);
	 }
}

// ----------------------------------------------------------------
static int do_stream(char* file_name) {
	FILE* output_stream = stdout;
	int fd = open(file_name, O_RDONLY);
	if (fd < 0) {
		perror("open");
		exit(1);
	}
	struct stat stat;
	if (fstat(fd, &stat) < 0) {
		perror("fstat");
		exit(1);
	}
	char* sof = mmap(NULL, (size_t)stat.st_size, PROT_READ|PROT_WRITE, MAP_FILE|MAP_PRIVATE, fd, (off_t)0);
	if (sof == MAP_FAILED) {
		perror("mmap");
		exit(1);
	}
	char* eof = sof + stat.st_size;
	char* sol = sof;
	char* eol;
	char* p = sof;

	while (p < eof) {
		if (*p == '\n') {
			*p = 0;
			eol = p;
			emit(sol, eol, output_stream);
			p++;
			sol = p;
		} else {
			p++;
		}
	}

	if (close(fd) < 0) {
		perror("close");
		exit(1);
	}

	return 1;
}

// ================================================================
int main(int argc, char** argv) {
	int ok = 1;
	for (int argi = 1; argi < argc; argi++) {
	    ok = do_stream(argv[argi]);
	}
	return ok ? 0 : 1;
}

D cat

// Reads $(D stdin) and writes it to $(D stdout).
import std.stdio;

void main()
{
	string line;
	while ((line = stdin.readln()) !is null)
		write(line);
}

Rust cat

use std::io;
use std::io::BufRead;

fn main() {
    for line in io::stdin().lock().lines() {
        print!("{}", line.unwrap());
    }
}

Go cat

package main

import (
	"bufio"
	"io"
	"log"
	"os"
)

// ----------------------------------------------------------------
func main() {
	args := os.Args[1:]
	includeFields := []string {"a", "x"};

	ok := true
	if len(args) == 0 {
		ok = handle("-", includeFields) && ok
	} else {
		for _, arg := range args {
			ok = handle(arg, includeFields) && ok
		}
	}
	if ok {
		os.Exit(0)
	} else {
		os.Exit(1)
	}
}

// ----------------------------------------------------------------
func handle(fileName string, includeFields []string) (ok bool) {
	inputStream := os.Stdin
	if fileName != "-" {
		var err error
		if inputStream, err = os.Open(fileName); err != nil {
			log.Println(err)
			return false
		}
	}

	reader := bufio.NewReader(inputStream)
	writer := bufio.NewWriter(os.Stdout)
	eof := false

	for !eof {
		line, err := reader.ReadString('\n')
		if err == io.EOF {
			err = nil
			eof = true
		} else if err != nil {
			log.Println(err)
			if fileName != "-" {
				inputStream.Close()
			}
			return false
		} else {
			writer.WriteString(line)
		}
	}
	if fileName != "-" {
		inputStream.Close()
	}
	writer.Flush()

	return true
}

Nim cat

for line in stdin.lines:
  echo(line)

D cut

// Reads $(D stdin) and writes it to $(D stdout).
// http://dlang.org/hash-map.html
import std.stdio;
import std.string;
import std.array;

void main() {
	string[] includeFields = ["a", "x"];
	string line;
	while ((line = stdin.readln()) !is null) {
		// Input string to hashmap.
		string[string] oldmap;
		string[] fields = split(line, ',');
		foreach (field; fields) {
			string[] kvps = split(field, '='); // really want splitN with max #parts = 2
			oldmap[kvps[0]] = kvps[1];
		}

		// Hashmap-to-hashmap transform.
		// Note: unordered hashmap here.
		string[string] newmap;
		foreach (includeField; includeFields) {
			if (includeField in oldmap) {
				newmap[includeField] = oldmap[includeField];
			}
		}

		// Hashmap to output strings.
		int i = 0;
		foreach (key; newmap.keys) {
			if (i > 0)
				write(',');
			write(key);
			write('=');
			write(newmap[key]);
			i++;
		}
		write('\n');
	}
}

Go cut

package main

import (
	"bufio"
	"io"
	"log"
	"os"
	"strings"
)

// ----------------------------------------------------------------
func main() {
	args := os.Args[1:]
	includeFields := []string {"a", "x"};

	ok := true
	if len(args) == 0 {
		ok = handle("-", includeFields) && ok
	} else {
		for _, arg := range args {
			ok = handle(arg, includeFields) && ok
		}
	}
	if ok {
		os.Exit(0)
	} else {
		os.Exit(1)
	}
}

// ----------------------------------------------------------------
func handle(fileName string, includeFields []string) (ok bool) {
	inputStream := os.Stdin
	if fileName != "-" {
		var err error
		if inputStream, err = os.Open(fileName); err != nil {
			log.Println(err)
			return false
		}
	}

	reader := bufio.NewReader(inputStream)
	writer := bufio.NewWriter(os.Stdout)
	eof := false

	for !eof {
		line, err := reader.ReadString('\n')
		if err == io.EOF {
			err = nil
			eof = true
		} else if err != nil {
			log.Println(err)
			if fileName != "-" {
				inputStream.Close()
			}
			return false
		} else {

			// 0.030s

			// Line to map
			mymap := make(map[string]string)
			fields := strings.Split(line, ",");
			for _, field := range(fields) {
				kvps := strings.SplitN(field, "=", 2)
				mymap[kvps[0]] = kvps[1]
			}
			// 0.220s
			// delta 0.190s
			// 27%

			// Map-to-map transform
			newmap := make(map[string]string)
			for _, includeField := range(includeFields) {
				value, present := mymap[includeField]
				if present {
					newmap[includeField] = value
				}
			}
			// 0.280s
			// delta 0.060s
			// 9%

			// Map to string
			outs := make([]string, len(newmap))
			i := 0
			for k, v := range(newmap) {
				outs[i] = k + "=" + v
				i++
			}
			// 0.320s
			// delta 0.040s
			// 6%

			out := strings.Join(outs, ",")
			// 0.330s
			// delta 0.010s
			// 2%

			// Write to output stream
			//fmt.Println("")
			writer.WriteString(out)
			// delta 0.390s
			// 56%

		}
	}
	if fileName != "-" {
		inputStream.Close()
	}
	writer.Flush()

	return true
}

Nim cut

import strutils, tables

for line in stdin.lines:
  #var map: OrderedTable[string,string]
  var map = {"":""}.newOrderedTable
  #var map = initTable[string, string]
  #var map: OrderedTable[string, string]
  #var map: newOrderedTable[string, string](16)
  for word in line.split(","):
      var pair = word.split("=")
      #echo(pair[0])
      #echo(pair[1])
      #echo()
      #map[pair[0]] = pair[1]
      map.add(pair[0], pair[1])

Overview

Timings

Discussion

Source code