about summary refs log tree commit homepage
path: root/ext/mahoro/mahoro.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mahoro/mahoro.c')
-rw-r--r--ext/mahoro/mahoro.c565
1 files changed, 565 insertions, 0 deletions
diff --git a/ext/mahoro/mahoro.c b/ext/mahoro/mahoro.c
new file mode 100644
index 0000000..01d1cf8
--- /dev/null
+++ b/ext/mahoro/mahoro.c
@@ -0,0 +1,565 @@
+/*
+ * This file is Public Domain.
+ *
+ * Note: The current maintainer (Eric Wong) respects and preserves the
+ * original coding style of the original author (Shu-yu Guo) in case
+ * he ever chooses to return to this project.
+ */
+
+#include <ruby.h>
+#include <magic.h>
+#include "nogvl_compat.h"
+
+#ifndef RSTRING_LEN
+#  define RSTRING_LEN(s)->len
+#endif
+#ifndef RSTRING_PTR
+#  define RSTRING_PTR(s)->ptr
+#endif
+
+static VALUE eMahoroError;
+static ID id_to_path;
+
+struct nogvl_args {
+        magic_t cookie;
+        union {
+                const char *path;
+                int fd;
+        } as;
+};
+
+/* :nodoc: called automatically by GC */
+static void
+mahoro_free(ptr)
+        void *ptr;
+{
+        if (ptr)
+                magic_close((magic_t)ptr);
+}
+
+/* :nodoc: called automatically on Mahoro#initialize */
+static VALUE
+mahoro_allocate(klass)
+        VALUE klass;
+{
+        return Data_Wrap_Struct(klass, 0, mahoro_free, 0);
+}
+
+static void *
+nogvl_load(ptr)
+        void *ptr;
+{
+        struct nogvl_args *args = ptr;
+
+        return magic_load(args->cookie, args->as.path) ? ptr : NULL;
+}
+
+/*
+ * call-seq:
+ *        Mahoro.new(flags = Mahoro::NONE, path = nil)        ->        mahoro_obj
+ *
+ * Create and initialize a new Mahoro object.
+ * +flags+ may be one or more of any combination of the Mahoro:: constants
+ * supported by Mahoro#flags=.
+ * +path+ (if not nil) is a colon-separated list of database files, see
+ * Mahoro#load.
+ *
+ * If +path+ is not given (or nil), the default database is used.
+ * Consult your libmagic(3) documentation for the location of that file
+ * as it varies by installation.
+ */
+static VALUE
+mahoro_initialize(argc, argv, self)
+        int argc;
+        VALUE *argv, self;
+{
+        int flags = MAGIC_NONE;
+        struct nogvl_args args;
+        VALUE vpath, vflags;
+
+        args.as.path = NULL;
+
+        switch(rb_scan_args(argc, argv, "02", &vflags, &vpath)) {
+                case 2:
+                        if(!NIL_P(vpath)) {
+                                args.as.path = StringValueCStr(vpath);
+                        }
+                        /* fallthrough */
+                case 1:
+                        flags = FIX2INT(vflags);
+                        break;
+        }
+
+        if(!(args.cookie = magic_open(flags))) {
+                rb_raise(eMahoroError, "failed to initialize magic cookie");
+        }
+
+        if(NOGVL(nogvl_load, &args, RUBY_UBF_IO, NULL)) {
+                rb_raise(eMahoroError, "failed to load database: %s",
+                                      magic_error(args.cookie));
+        }
+
+        DATA_PTR(self) = args.cookie;
+
+        return self;
+}
+
+static void *
+nogvl_file(ptr)
+        void *ptr;
+{
+        struct nogvl_args *args = ptr;
+
+        return (void *)magic_file(args->cookie, args->as.path);
+}
+
+/*
+ * call-seq:
+ *        mahoro_obj.file(filename)        ->        String
+ *
+ * Returns a textual description of the contents of the +filename+ given.
+ * Use Mahoro#buffer instead of this method if the contents of your
+ * file is already in memory.
+ * Raises Mahoro::Error on failed lookups.
+ */
+static VALUE
+mahoro_file(self, path)
+        VALUE self, path;
+{
+        const char *msg;
+        struct nogvl_args args;
+
+        args.cookie = (magic_t)DATA_PTR(self);
+
+        /* Pathname objects may be transformed via #to_path */
+        if (rb_respond_to(path, id_to_path))
+                path = rb_funcall(path, id_to_path, 0);
+
+        args.as.path = StringValueCStr(path);
+
+        if(!(msg = NOGVL(nogvl_file, &args, RUBY_UBF_IO, NULL))) {
+                rb_raise(eMahoroError, "failed lookup: %s",
+                        magic_error(args.cookie));
+        }
+
+        return rb_str_new2(msg);
+}
+
+/*
+ * call-seq:
+ *        mahoro_obj.buffer(buffer)        ->        String
+ *
+ * Returns a textual description of the contents of the +buffer+ given.
+ * +buffer+ should be a String object.
+ * Use Mahoro#file instead of this method if the contents is not already
+ * in memory (and possibly too large to fit into memory).
+ * Raises Mahoro::Error on failed lookups.
+ */
+static VALUE
+mahoro_buffer(self, input)
+        VALUE self, input;
+{
+        const char *msg;
+        magic_t cookie = (magic_t)DATA_PTR(self);
+
+        StringValue(input);
+
+        if(!(msg = magic_buffer(cookie, RSTRING_PTR(input),
+                                        RSTRING_LEN(input)))) {
+                rb_raise(eMahoroError, "failed lookup: %s", magic_error(cookie));
+        }
+
+        return rb_str_new2(msg);
+}
+
+/*
+ * call-seq:
+ *        mahoro_obj.flags = flags
+ *
+ * Change the behavior of an already-initialized Mahoro object.  The
+ * behavior of a Mahoro object is specified at load time, but may be
+ * changed after-the-fact.
+ * +flags+ is a bitwise (OR) mask of one or more of the following constants
+ * in the Mahoro namespace:
+ *
+ * - APPLE
+ * - CHECK
+ * - COMPRESS
+ * - CONTINUE
+ * - DEBUG
+ * - DEVICES
+ * - ERROR
+ * - MIME
+ * - MIME_ENCODING
+ * - MIME_TYPE
+ * - NONE
+ * - NO_CHECK_APPTYPE
+ * - NO_CHECK_COMPRESS
+ * - NO_CHECK_ELF
+ * - NO_CHECK_ENCODING
+ * - NO_CHECK_SOFT
+ * - NO_CHECK_TAR
+ * - NO_CHECK_TEXT
+ * - NO_CHECK_TOKENS
+ * - PRESERVE_ATIME
+ * - RAW
+ * - SYMLINK
+ */
+static VALUE
+mahoro_set_flags(self, flags)
+        VALUE self, flags;
+{
+        magic_t cookie = (magic_t)DATA_PTR(self);
+
+        return INT2FIX(magic_setflags(cookie, FIX2INT(flags)));
+}
+
+static void *
+nogvl_check(ptr)
+        void *ptr;
+{
+        struct nogvl_args *args = ptr;
+
+        return magic_check(args->cookie, args->as.path) ? ptr : NULL;
+}
+
+/*
+ * call-seq:
+ *        mahoro_obj.check(path = nil)        ->        true or false
+ *
+ * This is used to check the validity of entries in the colon separated
+ * database files passed in as +path+.  If +path+ is not passed (or nil),
+ * this will check the default database.
+ */
+static VALUE
+mahoro_check(argc, argv, self)
+        int argc;
+        VALUE *argv, self;
+{
+        struct nogvl_args args;
+        VALUE vpath;
+
+        args.cookie = (magic_t)DATA_PTR(self);
+        args.as.path = NULL;
+
+        switch(rb_scan_args(argc, argv, "01", &vpath)) {
+                case 1:
+                        if(!NIL_P(vpath)) {
+                                args.as.path = StringValueCStr(vpath);
+                        }
+                        break;
+        }
+
+        if(!NOGVL(nogvl_check, &args, RUBY_UBF_IO, 0)) {
+                return Qtrue;
+        } else {
+                return Qfalse;
+        }
+}
+
+static void *
+nogvl_compile(ptr)
+        void *ptr;
+{
+        struct nogvl_args *args = ptr;
+
+        return magic_compile(args->cookie, args->as.path) ? ptr : NULL;
+}
+
+/*
+ * call-seq:
+ *        mahoro_obj.compile(path)        ->        true
+ *
+ * Compile the the colon separated list of database files passed in as +path+.
+ * It returns true on success and raises Mahoro::Error on failure.
+ * Compiled files created are named from the +File.basename+ of each file
+ * argument with ".mgc" appended to it.
+ *
+ * There is no need to use this function if you are using the default magic(5)
+ * database on your operating system.  This is only needed if you require
+ * additional magic not in the default magic database.
+ *
+ * Users of this method are likely to need Mahoro#load (and vice-versa).
+ */
+static VALUE
+mahoro_compile(self, path)
+        VALUE self, path;
+{
+        struct nogvl_args args;
+
+        args.cookie = (magic_t)DATA_PTR(self);
+        args.as.path = StringValueCStr(path);
+
+        if(NOGVL(nogvl_compile, &args, RUBY_UBF_IO, NULL)) {
+                rb_raise(eMahoroError, "failed compile: %s",
+                        magic_error(args.cookie));
+        }
+
+        return Qtrue;
+}
+
+/*
+ * call-seq:
+ *        Mahoro.compile(path)        ->        true
+ *
+ * This is a wrapper around the Mahoro#compile instance method, but does not
+ * require an existing Mahoro object.  Use the instance method unless you only
+ * need to test the validity of a magic(5) database.
+ */
+static VALUE
+mahoro_s_compile(klass, path)
+        VALUE klass, path;
+{
+        VALUE m = rb_funcall(klass, rb_intern("new"), 0, 0);
+
+        return mahoro_compile(m, path);
+}
+
+/*
+ * call-seq:
+ *        mahoro_obj.load(path)        ->        mahoro_obj
+ *
+ * Used to load the the colon-separated list of database files (+path+).
+ * The ".mgc" suffix will be added to each filename where appropriate.
+ * This will raise Mahoro::Error on failure.
+ *
+ * There is no need to use this function if you are using the default magic(5)
+ * database on your operating system.  This is only needed if you require
+ * additional magic not in the default magic database.
+ *
+ * The default database file is named by the MAGIC environment variable.
+ * Consult your libmagic installation documentation for the location of
+ * your default database file name.
+ *
+ * Users of this method are likely to need Mahoro#compile (and vice-versa).
+ */
+static VALUE
+mahoro_load(self, path)
+        VALUE self, path;
+{
+        struct nogvl_args args;
+
+        args.cookie = (magic_t)DATA_PTR(self);
+        args.as.path = StringValueCStr(path);
+
+        if(NOGVL(nogvl_load, &args, RUBY_UBF_IO, NULL)) {
+                rb_raise(eMahoroError, "failed load: %s",
+                        magic_error(args.cookie));
+        }
+
+        return self;
+}
+
+void Init_mahoro(void)
+{
+        VALUE cMahoro;
+        /*
+         * Mahoro is a simple interface to libmagic.
+         *
+         * Common use cases:
+         *
+         *        # initialize a Mahoro object for reading MIME types
+         *        mahoro_obj = Mahoro.new(Mahoro::MIME)
+         *
+         *        # get the MIME type of a file on disk
+         *        # This is ideal for large files which you do not need to
+         *        # read in their entirety.
+         *        mahoro_obj.file('/path/to/file.c') -> 'text/x-c'
+         *
+         *        # get the MIME type of a string buffer
+         *        # This is only ideal if you already have the buffer in
+         *        # memory or intend to process it soon
+         *        str = File.read('/path/to/file.c')
+         *        mahoro_obj.buffer(str) -> 'text/x-c'
+         *
+         *        # switch the Mahoro object to return an ASCII description
+         *        mahoro_obj.flags = Mahoro::NONE
+         *
+         *        # Similar to the above example, but the Mahoro object
+         *        # now returns a textual        description
+         *        mahoro_obj.file('/path/to/file.c') -> 'ASCII C program text'
+         *
+         *        # Similar to the above example, but the Mahoro object
+         *        # now returns a textual        description
+         *        str = File.read('/path/to/file.c')
+         *        mahoro_obj.buffer(str) -> 'ASCII C program text'
+         *
+         * Mahoro is not thread-safe by default, see Mahoro::ThreadSafe for
+         * making this module thread-safe.
+         *
+         * More information about libmagic:
+         * https://en.wikipedia.org/wiki/Libmagic
+         *
+         * Source code is available via git:
+         *        git clone git://bogomips.org/mahoro.git
+         *
+         * And viewable with a web browser via cgit:
+         *        http://bogomips.org/mahoro.git
+         *
+         * Eric Wong is the current maintainer of Mahoro.
+         * Plain-text comments, questions, bug reports, patches and
+         * pull requests are all highly welcome on the public mailing list:
+         * mahoro@librelist.org
+         *
+         * You may contact Eric privately at normalperson@yhbt.net
+         *
+         * Please generate patches using the "git format-patch" command.
+         * Use of "git send-email" to send a patch is recommended.
+         * For reviewed patches, you may also send a pull request formatted
+         * using the "git request-pull" command.
+         *
+         * Do not expect Eric to read HTML email under any circumstances.
+         */
+        cMahoro      = rb_define_class("Mahoro", rb_cObject);
+        eMahoroError = rb_define_class_under(cMahoro, "Error", rb_eStandardError);
+
+        /* No special handling, the default */
+        rb_define_const(cMahoro, "NONE", INT2FIX(MAGIC_NONE));
+
+        /* print debugging messages to stderr */
+        rb_define_const(cMahoro, "DEBUG", INT2FIX(MAGIC_DEBUG));
+
+        /* Follow symlinks */
+        rb_define_const(cMahoro, "SYMLINK", INT2FIX(MAGIC_SYMLINK));
+
+        /* Check inside compressed files */
+        rb_define_const(cMahoro, "COMPRESS", INT2FIX(MAGIC_COMPRESS));
+
+        /* Look at the contents of devices */
+        rb_define_const(cMahoro, "DEVICES", INT2FIX(MAGIC_DEVICES));
+
+#ifdef MAGIC_MIME_TYPE
+        /*
+         * Return only the MIME type
+         * This constant may not be defined on older systems.
+         */
+        rb_define_const(cMahoro, "MIME_TYPE", INT2FIX(MAGIC_MIME_TYPE));
+#endif
+
+        /* Return all matches */
+        rb_define_const(cMahoro, "CONTINUE", INT2FIX(MAGIC_CONTINUE));
+
+        /*
+         * Check the magic database for consistency and
+         * print warnings to stderr
+         */
+        rb_define_const(cMahoro, "CHECK", INT2FIX(MAGIC_CHECK));
+
+        /* preserve access time of files analyzed */
+        rb_define_const(cMahoro, "PRESERVE_ATIME",
+                              INT2FIX(MAGIC_PRESERVE_ATIME));
+
+        /*
+         * Don't translate unprintable characters to a \\ooo octal
+         * representation
+         */
+        rb_define_const(cMahoro, "RAW", INT2FIX(MAGIC_RAW));
+
+        /*
+         * Treat operating system errors while trying to open files
+         * and follow symlinks as real errors, instead of printing
+         * them in the magic buffer.
+         */
+        rb_define_const(cMahoro, "ERROR", INT2FIX(MAGIC_ERROR));
+
+#ifdef MAGIC_MIME_ENCODING
+        /*
+         * Return a MIME encoding, instead of a textual description.
+         * This constant may not be defined on older systems.
+         */
+        rb_define_const(cMahoro, "MIME_ENCODING", INT2FIX(MAGIC_MIME_ENCODING));
+#endif
+
+        /* return both MIME type and encoding */
+        rb_define_const(cMahoro, "MIME", INT2FIX(MAGIC_MIME));
+
+#ifdef MAGIC_APPLE
+        /*
+         * Return both Apple creator and type.
+         * This constant may not be defined on older systems.
+         */
+        rb_define_const(cMahoro, "APPLE", INT2FIX(MAGIC_APPLE));
+#endif
+
+#ifdef MAGIC_NO_CHECK_COMPRESS
+        /*
+         * Don't check for or inside compressed files.
+         * This constant may not be defined on older systems.
+         */
+        rb_define_const(cMahoro, "NO_CHECK_COMPRESS",
+                        INT2FIX(MAGIC_NO_CHECK_COMPRESS));
+#endif
+
+#ifdef MAGIC_NO_CHECK_TAR
+        /*
+         * Don't examine tar files.
+         * This constant may not be defined on older systems.
+         */
+        rb_define_const(cMahoro, "NO_CHECK_TAR", INT2FIX(MAGIC_NO_CHECK_TAR));
+#endif
+
+#ifdef MAGIC_NO_CHECK_SOFT
+        /*
+         * Don't consult magic files.
+         * This constant may not be defined on older systems.
+         */
+        rb_define_const(cMahoro, "NO_CHECK_SOFT", INT2FIX(MAGIC_NO_CHECK_SOFT));
+#endif
+
+#ifdef MAGIC_NO_CHECK_APPTYPE
+        /*
+         * Don't check application type (EMX only).
+         * This constant may not be defined on older systems.
+         */
+        rb_define_const(cMahoro, "NO_CHECK_APPTYPE",
+                        INT2FIX(MAGIC_NO_CHECK_APPTYPE));
+#endif
+
+#ifdef MAGIC_NO_CHECK_ELF
+        /*
+         * Don't check for ELF details.
+         * This constant may not be defined on older systems.
+         */
+        rb_define_const(cMahoro, "NO_CHECK_ELF", INT2FIX(MAGIC_NO_CHECK_ELF));
+#endif
+
+#ifdef MAGIC_NO_CHECK_ASCII
+        /*
+         * Don't check for various types of ASCII text files.
+         * This constant may not be defined on older systems.
+         */
+        rb_define_const(cMahoro, "NO_CHECK_TEXT",
+                        INT2FIX(MAGIC_NO_CHECK_ASCII));
+#endif
+
+#ifdef MAGIC_NO_CHECK_TOKENS
+        /*
+         * Don't check for known tokens inside ASCII files.
+         * This constant may not be defined on older systems.
+         */
+        rb_define_const(cMahoro, "NO_CHECK_TOKENS",
+                        INT2FIX(MAGIC_NO_CHECK_TOKENS));
+#endif
+
+#ifdef MAGIC_NO_CHECK_ENCODING
+        /*
+         * Don't check for text encodings.
+         * This constant may not be defined on older systems.
+         */
+        rb_define_const(cMahoro, "NO_CHECK_ENCODING",
+                        INT2FIX(MAGIC_NO_CHECK_ENCODING));
+#endif
+
+        rb_define_alloc_func(cMahoro, mahoro_allocate);
+        rb_define_method(cMahoro, "initialize", mahoro_initialize, -1);
+        rb_define_method(cMahoro, "file", mahoro_file, 1);
+        rb_define_method(cMahoro, "buffer", mahoro_buffer, 1);
+        rb_define_method(cMahoro, "flags=", mahoro_set_flags, 1);
+        rb_define_method(cMahoro, "valid?", mahoro_check, -1);
+        rb_define_singleton_method(cMahoro, "compile", mahoro_s_compile, 1);
+        rb_define_method(cMahoro, "compile", mahoro_compile, 1);
+        rb_define_method(cMahoro, "load", mahoro_load, 1);
+        id_to_path = rb_intern("to_path");
+}
+
+/* arch-tag: mahoro */