From c5fab448d4260594a876a2d29339156e45bfd379 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 5 Apr 2013 22:08:41 +0000 Subject: tree reorganization + various maint fixes Using an ext/ directory is easier to grok for RubyGems --- ext/mahoro/mahoro.c | 565 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 565 insertions(+) create mode 100644 ext/mahoro/mahoro.c (limited to 'ext/mahoro/mahoro.c') diff --git a/ext/mahoro/mahoro.c b/ext/mahoro/mahoro.c new file mode 100644 index 0000000..01d1cf8 --- /dev/null +++ b/ext/mahoro/mahoro.c @@ -0,0 +1,565 @@ +/* + * This file is Public Domain. + * + * Note: The current maintainer (Eric Wong) respects and preserves the + * original coding style of the original author (Shu-yu Guo) in case + * he ever chooses to return to this project. + */ + +#include +#include +#include "nogvl_compat.h" + +#ifndef RSTRING_LEN +# define RSTRING_LEN(s)->len +#endif +#ifndef RSTRING_PTR +# define RSTRING_PTR(s)->ptr +#endif + +static VALUE eMahoroError; +static ID id_to_path; + +struct nogvl_args { + magic_t cookie; + union { + const char *path; + int fd; + } as; +}; + +/* :nodoc: called automatically by GC */ +static void +mahoro_free(ptr) + void *ptr; +{ + if (ptr) + magic_close((magic_t)ptr); +} + +/* :nodoc: called automatically on Mahoro#initialize */ +static VALUE +mahoro_allocate(klass) + VALUE klass; +{ + return Data_Wrap_Struct(klass, 0, mahoro_free, 0); +} + +static void * +nogvl_load(ptr) + void *ptr; +{ + struct nogvl_args *args = ptr; + + return magic_load(args->cookie, args->as.path) ? ptr : NULL; +} + +/* + * call-seq: + * Mahoro.new(flags = Mahoro::NONE, path = nil) -> mahoro_obj + * + * Create and initialize a new Mahoro object. + * +flags+ may be one or more of any combination of the Mahoro:: constants + * supported by Mahoro#flags=. + * +path+ (if not nil) is a colon-separated list of database files, see + * Mahoro#load. + * + * If +path+ is not given (or nil), the default database is used. + * Consult your libmagic(3) documentation for the location of that file + * as it varies by installation. + */ +static VALUE +mahoro_initialize(argc, argv, self) + int argc; + VALUE *argv, self; +{ + int flags = MAGIC_NONE; + struct nogvl_args args; + VALUE vpath, vflags; + + args.as.path = NULL; + + switch(rb_scan_args(argc, argv, "02", &vflags, &vpath)) { + case 2: + if(!NIL_P(vpath)) { + args.as.path = StringValueCStr(vpath); + } + /* fallthrough */ + case 1: + flags = FIX2INT(vflags); + break; + } + + if(!(args.cookie = magic_open(flags))) { + rb_raise(eMahoroError, "failed to initialize magic cookie"); + } + + if(NOGVL(nogvl_load, &args, RUBY_UBF_IO, NULL)) { + rb_raise(eMahoroError, "failed to load database: %s", + magic_error(args.cookie)); + } + + DATA_PTR(self) = args.cookie; + + return self; +} + +static void * +nogvl_file(ptr) + void *ptr; +{ + struct nogvl_args *args = ptr; + + return (void *)magic_file(args->cookie, args->as.path); +} + +/* + * call-seq: + * mahoro_obj.file(filename) -> String + * + * Returns a textual description of the contents of the +filename+ given. + * Use Mahoro#buffer instead of this method if the contents of your + * file is already in memory. + * Raises Mahoro::Error on failed lookups. + */ +static VALUE +mahoro_file(self, path) + VALUE self, path; +{ + const char *msg; + struct nogvl_args args; + + args.cookie = (magic_t)DATA_PTR(self); + + /* Pathname objects may be transformed via #to_path */ + if (rb_respond_to(path, id_to_path)) + path = rb_funcall(path, id_to_path, 0); + + args.as.path = StringValueCStr(path); + + if(!(msg = NOGVL(nogvl_file, &args, RUBY_UBF_IO, NULL))) { + rb_raise(eMahoroError, "failed lookup: %s", + magic_error(args.cookie)); + } + + return rb_str_new2(msg); +} + +/* + * call-seq: + * mahoro_obj.buffer(buffer) -> String + * + * Returns a textual description of the contents of the +buffer+ given. + * +buffer+ should be a String object. + * Use Mahoro#file instead of this method if the contents is not already + * in memory (and possibly too large to fit into memory). + * Raises Mahoro::Error on failed lookups. + */ +static VALUE +mahoro_buffer(self, input) + VALUE self, input; +{ + const char *msg; + magic_t cookie = (magic_t)DATA_PTR(self); + + StringValue(input); + + if(!(msg = magic_buffer(cookie, RSTRING_PTR(input), + RSTRING_LEN(input)))) { + rb_raise(eMahoroError, "failed lookup: %s", magic_error(cookie)); + } + + return rb_str_new2(msg); +} + +/* + * call-seq: + * mahoro_obj.flags = flags + * + * Change the behavior of an already-initialized Mahoro object. The + * behavior of a Mahoro object is specified at load time, but may be + * changed after-the-fact. + * +flags+ is a bitwise (OR) mask of one or more of the following constants + * in the Mahoro namespace: + * + * - APPLE + * - CHECK + * - COMPRESS + * - CONTINUE + * - DEBUG + * - DEVICES + * - ERROR + * - MIME + * - MIME_ENCODING + * - MIME_TYPE + * - NONE + * - NO_CHECK_APPTYPE + * - NO_CHECK_COMPRESS + * - NO_CHECK_ELF + * - NO_CHECK_ENCODING + * - NO_CHECK_SOFT + * - NO_CHECK_TAR + * - NO_CHECK_TEXT + * - NO_CHECK_TOKENS + * - PRESERVE_ATIME + * - RAW + * - SYMLINK + */ +static VALUE +mahoro_set_flags(self, flags) + VALUE self, flags; +{ + magic_t cookie = (magic_t)DATA_PTR(self); + + return INT2FIX(magic_setflags(cookie, FIX2INT(flags))); +} + +static void * +nogvl_check(ptr) + void *ptr; +{ + struct nogvl_args *args = ptr; + + return magic_check(args->cookie, args->as.path) ? ptr : NULL; +} + +/* + * call-seq: + * mahoro_obj.check(path = nil) -> true or false + * + * This is used to check the validity of entries in the colon separated + * database files passed in as +path+. If +path+ is not passed (or nil), + * this will check the default database. + */ +static VALUE +mahoro_check(argc, argv, self) + int argc; + VALUE *argv, self; +{ + struct nogvl_args args; + VALUE vpath; + + args.cookie = (magic_t)DATA_PTR(self); + args.as.path = NULL; + + switch(rb_scan_args(argc, argv, "01", &vpath)) { + case 1: + if(!NIL_P(vpath)) { + args.as.path = StringValueCStr(vpath); + } + break; + } + + if(!NOGVL(nogvl_check, &args, RUBY_UBF_IO, 0)) { + return Qtrue; + } else { + return Qfalse; + } +} + +static void * +nogvl_compile(ptr) + void *ptr; +{ + struct nogvl_args *args = ptr; + + return magic_compile(args->cookie, args->as.path) ? ptr : NULL; +} + +/* + * call-seq: + * mahoro_obj.compile(path) -> true + * + * Compile the the colon separated list of database files passed in as +path+. + * It returns true on success and raises Mahoro::Error on failure. + * Compiled files created are named from the +File.basename+ of each file + * argument with ".mgc" appended to it. + * + * There is no need to use this function if you are using the default magic(5) + * database on your operating system. This is only needed if you require + * additional magic not in the default magic database. + * + * Users of this method are likely to need Mahoro#load (and vice-versa). + */ +static VALUE +mahoro_compile(self, path) + VALUE self, path; +{ + struct nogvl_args args; + + args.cookie = (magic_t)DATA_PTR(self); + args.as.path = StringValueCStr(path); + + if(NOGVL(nogvl_compile, &args, RUBY_UBF_IO, NULL)) { + rb_raise(eMahoroError, "failed compile: %s", + magic_error(args.cookie)); + } + + return Qtrue; +} + +/* + * call-seq: + * Mahoro.compile(path) -> true + * + * This is a wrapper around the Mahoro#compile instance method, but does not + * require an existing Mahoro object. Use the instance method unless you only + * need to test the validity of a magic(5) database. + */ +static VALUE +mahoro_s_compile(klass, path) + VALUE klass, path; +{ + VALUE m = rb_funcall(klass, rb_intern("new"), 0, 0); + + return mahoro_compile(m, path); +} + +/* + * call-seq: + * mahoro_obj.load(path) -> mahoro_obj + * + * Used to load the the colon-separated list of database files (+path+). + * The ".mgc" suffix will be added to each filename where appropriate. + * This will raise Mahoro::Error on failure. + * + * There is no need to use this function if you are using the default magic(5) + * database on your operating system. This is only needed if you require + * additional magic not in the default magic database. + * + * The default database file is named by the MAGIC environment variable. + * Consult your libmagic installation documentation for the location of + * your default database file name. + * + * Users of this method are likely to need Mahoro#compile (and vice-versa). + */ +static VALUE +mahoro_load(self, path) + VALUE self, path; +{ + struct nogvl_args args; + + args.cookie = (magic_t)DATA_PTR(self); + args.as.path = StringValueCStr(path); + + if(NOGVL(nogvl_load, &args, RUBY_UBF_IO, NULL)) { + rb_raise(eMahoroError, "failed load: %s", + magic_error(args.cookie)); + } + + return self; +} + +void Init_mahoro(void) +{ + VALUE cMahoro; + /* + * Mahoro is a simple interface to libmagic. + * + * Common use cases: + * + * # initialize a Mahoro object for reading MIME types + * mahoro_obj = Mahoro.new(Mahoro::MIME) + * + * # get the MIME type of a file on disk + * # This is ideal for large files which you do not need to + * # read in their entirety. + * mahoro_obj.file('/path/to/file.c') -> 'text/x-c' + * + * # get the MIME type of a string buffer + * # This is only ideal if you already have the buffer in + * # memory or intend to process it soon + * str = File.read('/path/to/file.c') + * mahoro_obj.buffer(str) -> 'text/x-c' + * + * # switch the Mahoro object to return an ASCII description + * mahoro_obj.flags = Mahoro::NONE + * + * # Similar to the above example, but the Mahoro object + * # now returns a textual description + * mahoro_obj.file('/path/to/file.c') -> 'ASCII C program text' + * + * # Similar to the above example, but the Mahoro object + * # now returns a textual description + * str = File.read('/path/to/file.c') + * mahoro_obj.buffer(str) -> 'ASCII C program text' + * + * Mahoro is not thread-safe by default, see Mahoro::ThreadSafe for + * making this module thread-safe. + * + * More information about libmagic: + * https://en.wikipedia.org/wiki/Libmagic + * + * Source code is available via git: + * git clone git://bogomips.org/mahoro.git + * + * And viewable with a web browser via cgit: + * http://bogomips.org/mahoro.git + * + * Eric Wong is the current maintainer of Mahoro. + * Plain-text comments, questions, bug reports, patches and + * pull requests are all highly welcome on the public mailing list: + * mahoro@librelist.org + * + * You may contact Eric privately at normalperson@yhbt.net + * + * Please generate patches using the "git format-patch" command. + * Use of "git send-email" to send a patch is recommended. + * For reviewed patches, you may also send a pull request formatted + * using the "git request-pull" command. + * + * Do not expect Eric to read HTML email under any circumstances. + */ + cMahoro = rb_define_class("Mahoro", rb_cObject); + eMahoroError = rb_define_class_under(cMahoro, "Error", rb_eStandardError); + + /* No special handling, the default */ + rb_define_const(cMahoro, "NONE", INT2FIX(MAGIC_NONE)); + + /* print debugging messages to stderr */ + rb_define_const(cMahoro, "DEBUG", INT2FIX(MAGIC_DEBUG)); + + /* Follow symlinks */ + rb_define_const(cMahoro, "SYMLINK", INT2FIX(MAGIC_SYMLINK)); + + /* Check inside compressed files */ + rb_define_const(cMahoro, "COMPRESS", INT2FIX(MAGIC_COMPRESS)); + + /* Look at the contents of devices */ + rb_define_const(cMahoro, "DEVICES", INT2FIX(MAGIC_DEVICES)); + +#ifdef MAGIC_MIME_TYPE + /* + * Return only the MIME type + * This constant may not be defined on older systems. + */ + rb_define_const(cMahoro, "MIME_TYPE", INT2FIX(MAGIC_MIME_TYPE)); +#endif + + /* Return all matches */ + rb_define_const(cMahoro, "CONTINUE", INT2FIX(MAGIC_CONTINUE)); + + /* + * Check the magic database for consistency and + * print warnings to stderr + */ + rb_define_const(cMahoro, "CHECK", INT2FIX(MAGIC_CHECK)); + + /* preserve access time of files analyzed */ + rb_define_const(cMahoro, "PRESERVE_ATIME", + INT2FIX(MAGIC_PRESERVE_ATIME)); + + /* + * Don't translate unprintable characters to a \\ooo octal + * representation + */ + rb_define_const(cMahoro, "RAW", INT2FIX(MAGIC_RAW)); + + /* + * Treat operating system errors while trying to open files + * and follow symlinks as real errors, instead of printing + * them in the magic buffer. + */ + rb_define_const(cMahoro, "ERROR", INT2FIX(MAGIC_ERROR)); + +#ifdef MAGIC_MIME_ENCODING + /* + * Return a MIME encoding, instead of a textual description. + * This constant may not be defined on older systems. + */ + rb_define_const(cMahoro, "MIME_ENCODING", INT2FIX(MAGIC_MIME_ENCODING)); +#endif + + /* return both MIME type and encoding */ + rb_define_const(cMahoro, "MIME", INT2FIX(MAGIC_MIME)); + +#ifdef MAGIC_APPLE + /* + * Return both Apple creator and type. + * This constant may not be defined on older systems. + */ + rb_define_const(cMahoro, "APPLE", INT2FIX(MAGIC_APPLE)); +#endif + +#ifdef MAGIC_NO_CHECK_COMPRESS + /* + * Don't check for or inside compressed files. + * This constant may not be defined on older systems. + */ + rb_define_const(cMahoro, "NO_CHECK_COMPRESS", + INT2FIX(MAGIC_NO_CHECK_COMPRESS)); +#endif + +#ifdef MAGIC_NO_CHECK_TAR + /* + * Don't examine tar files. + * This constant may not be defined on older systems. + */ + rb_define_const(cMahoro, "NO_CHECK_TAR", INT2FIX(MAGIC_NO_CHECK_TAR)); +#endif + +#ifdef MAGIC_NO_CHECK_SOFT + /* + * Don't consult magic files. + * This constant may not be defined on older systems. + */ + rb_define_const(cMahoro, "NO_CHECK_SOFT", INT2FIX(MAGIC_NO_CHECK_SOFT)); +#endif + +#ifdef MAGIC_NO_CHECK_APPTYPE + /* + * Don't check application type (EMX only). + * This constant may not be defined on older systems. + */ + rb_define_const(cMahoro, "NO_CHECK_APPTYPE", + INT2FIX(MAGIC_NO_CHECK_APPTYPE)); +#endif + +#ifdef MAGIC_NO_CHECK_ELF + /* + * Don't check for ELF details. + * This constant may not be defined on older systems. + */ + rb_define_const(cMahoro, "NO_CHECK_ELF", INT2FIX(MAGIC_NO_CHECK_ELF)); +#endif + +#ifdef MAGIC_NO_CHECK_ASCII + /* + * Don't check for various types of ASCII text files. + * This constant may not be defined on older systems. + */ + rb_define_const(cMahoro, "NO_CHECK_TEXT", + INT2FIX(MAGIC_NO_CHECK_ASCII)); +#endif + +#ifdef MAGIC_NO_CHECK_TOKENS + /* + * Don't check for known tokens inside ASCII files. + * This constant may not be defined on older systems. + */ + rb_define_const(cMahoro, "NO_CHECK_TOKENS", + INT2FIX(MAGIC_NO_CHECK_TOKENS)); +#endif + +#ifdef MAGIC_NO_CHECK_ENCODING + /* + * Don't check for text encodings. + * This constant may not be defined on older systems. + */ + rb_define_const(cMahoro, "NO_CHECK_ENCODING", + INT2FIX(MAGIC_NO_CHECK_ENCODING)); +#endif + + rb_define_alloc_func(cMahoro, mahoro_allocate); + rb_define_method(cMahoro, "initialize", mahoro_initialize, -1); + rb_define_method(cMahoro, "file", mahoro_file, 1); + rb_define_method(cMahoro, "buffer", mahoro_buffer, 1); + rb_define_method(cMahoro, "flags=", mahoro_set_flags, 1); + rb_define_method(cMahoro, "valid?", mahoro_check, -1); + rb_define_singleton_method(cMahoro, "compile", mahoro_s_compile, 1); + rb_define_method(cMahoro, "compile", mahoro_compile, 1); + rb_define_method(cMahoro, "load", mahoro_load, 1); + id_to_path = rb_intern("to_path"); +} + +/* arch-tag: mahoro */ -- cgit v1.2.3-24-ge0c7