Skip to content
Snippets Groups Projects
iri_fix_importer.go 1.71 KiB
Newer Older
package core

import (
	"bufio"
	"fmt"
	"io"
	"log"
)

// Middleware Importer that (1) fixes all IRIs by escaping them quite
// aggressively and then (2) forward them all to another Importer.
type IriFixImporter struct {
	Next Importer
}

// Implements io.Reader for use in IriFixImporter.
type irifixreader struct {
	// We read bytes from this Reader.
	source io.Reader

	// We put converted bytes into this ch.
	ch chan byte
}

func (ifi IriFixImporter) Import(rdf io.Reader) error {
	proxy := &irifixreader{
		source: rdf,
		ch:     make(chan byte, 1024),
	}

	go proxy.writeToChan()
	return ifi.Next.Import(proxy)
}

// Implement io.Reader
func (ifr *irifixreader) Read(p []byte) (nbytes int, err error) {
	for nbytes = 0; nbytes < len(p); nbytes += 1 {
		if b, ok := <-ifr.ch; !ok {
			break
		} else {
			p[nbytes] = b
		}
	}

	if nbytes == 0 {
		return 0, io.EOF
	}

	return nbytes, nil
}

// Fill ifr.ch with with a fixed version of ifr.source.
func (ifr *irifixreader) writeToChan() {
	br := bufio.NewReader(ifr.source)

	insideQuotes := false

	for {
		r, _, err := br.ReadRune()

		// error handling

		if err == io.EOF {
			close(ifr.ch)
			break
		}

		if err != nil {
			close(ifr.ch)
			log.Print(err)
			break
		}

		// state machine

		if r == '"' {
			insideQuotes = !insideQuotes
		}

		var bs []byte

		if insideQuotes {
			bs = ifr.fixed(r)
		} else {
			bs = []byte(string(r))
		}

		// Write out to channel (where it will eventually
		// be consumed by Read).

		for _, b := range bs {
			ifr.ch <- b
		}
	}
}

func (ifr *irifixreader) fixed(r rune) []byte {
	bads := []rune{
		'|', '\n', ' ', '^', '\\',
	}

	for _, bad := range bads {
		if r == bad {
			return []byte(fmt.Sprintf("%%%X", r))
		}
	}

	return []byte(string(r))
}