(* not any more so simple automated downloader for free mp3-files offered on www.xlr8r.com * compile as: * $ ocamlfind ocamlc -package netclient,str,unix -linkpkg -o xlr8r xlr8r.ml *) let root_url = "http://www.xlr8r.com" let cookie = "XLR8RUID=99999999999999999" let filter_name = let space_re = Str.regexp "%20" in let name_re = Str.regexp "^http://www\\.xlr8r.com/downloads/download\\.php\\?id=[0-9]+&file=\\(.*\\)\\.mp3$" in function link -> if not (Str.string_match name_re link 0) then ( Printf.printf "<%s>\n%!" link ; failwith "filter_name" ) else let raw = Str.matched_group 1 link in Str.global_replace space_re " " raw let make_url = let re = Str.regexp " " in function link -> root_url ^ Str.global_replace re "%20" link let rec is_downloader = function Nethtml.Data "DOWNLOAD MP3" :: _ -> true | _ :: tail -> is_downloader tail | [] -> false let rec find_mp3 = function Nethtml.Element (tag,attrs,subs) -> let sub_mp3s = List.flatten (List.map find_mp3 subs) in if String.lowercase tag = "a" && is_downloader subs then ( try List.assoc "href" attrs :: sub_mp3s with Not_found -> sub_mp3s ) else sub_mp3s | Nethtml.Data _ -> [] let download folder urls cookie verbose parallels = let total = ref 0 in let calls = List.map (new Http_client.get) urls in let filename tmp base = let suff = if tmp then ".tmp" else "" in Printf.sprintf "%s/%s.mp3%s" folder base suff in let cb (call : Http_client.http_call) : unit = try let name = filter_name call#request_uri in if call # status = `Successful then ( let cnt_type = fst (call # response_header # content_type ()) in if cnt_type = "audio/mpeg" then let _ = Unix.system (Printf.sprintf "mv '%s' '%s'" (filename true name) (filename false name)) in Printf.printf "%s finished.\n%!" name else Printf.printf "%s provides no audio/mpeg but %s (check it):\n%s\n%!" name cnt_type call # response_body # value ; ) else Printf.printf "Problems with %s, take a look.\n%!" name with Failure "filter_name" -> Printf.printf "Unknown request to <%s>.\n%!" call#request_uri in let handler event_system event_queue = function Unixqueue.Input_arrived (_, fd) -> total := !total + in_channel_length (Unix.in_channel_of_descr fd) ; Printf.printf "%d bytes arrived.\n%!" !total | _ -> Printf.printf "something happend, not critical.\n%!" in let pipeline = new Http_client.pipeline in List.iter (fun (call : Http_client.get) -> let name = filter_name call#request_uri in let header = call # request_header `Base in header#update_field "Cookie" cookie ; call # set_response_body_storage (`File (fun () -> filename true name)) ; pipeline # add_with_callback call cb) calls ; let event_group = pipeline # event_system # new_group () in pipeline # set_options { pipeline # get_options with Http_client.verbose_connection = verbose ; Http_client.number_of_parallel_connections = parallels } ; pipeline # event_system # add_handler event_group handler ; pipeline # run () let weekly_folder root page = let time = Unix.localtime (Unix.time ()) in let week = (time.Unix.tm_yday + 7) / 7 in let year = 1900 + time.Unix.tm_year in let page_suff = match page with None -> "" | Some n -> "-" ^ string_of_int n in Printf.sprintf "%s/xlr8r%d-%d%s" root year week page_suff let parse_args () = let page = ref None in let target_folder = ref "." in let verbose = ref false in let debug = ref false in let parallels = ref 2 in Arg.parse [ "-n", Arg.Int (fun n -> page := Some n), "Download songs from archive page [n]" ; "-d", Arg.String (fun s -> target_folder := s), "Directory to save songs to (default: \".\")." ; "-v", Arg.Unit (fun _ -> verbose := true), "Be verbose." ; "-b", Arg.Unit (fun _ -> debug := true), "Debug mode." ; "-p", Arg.Int (fun n -> parallels := n), "Maximum number of parallel connections" ] (fun s -> print_endline ("dont know what to do with " ^ s ^ ".")) "Downloads some songs from www.xlr8r.com" ; !page, !target_folder, !verbose, !debug, !parallels let get_html url = Printf.printf "Retrieving website ... %!" ; let content = Http_client.Convenience.http_get url in let lexbuf = Lexing.from_string content in let html = Nethtml.parse_document lexbuf in Printf.printf "done.\n%!" ; html let page, target_folder, verbose, debug, parallels = parse_args () let folder, mp3s = if debug then "mukke", [ "http://www.xlr8r.com/downloads/download.php?id=1986&file=Amir%20Sulaiman%20-%20I%20Love%20You.mp3" ] else let site_url = match page with None -> root_url ^ "/downloads/" | Some n -> root_url ^ "/downloads/?page=" ^ string_of_int n in let html = get_html site_url in let mp3s = let links = List.rev (List.flatten (List.map find_mp3 html)) in List.map make_url links in let folder = weekly_folder target_folder page in folder, mp3s let _ = try Unix.mkdir folder 0o744 with Unix.Unix_error (Unix.EEXIST, "mkdir", _) -> () let _ = Unix.chown folder 1000 1000 let _ = download folder mp3s "XLR8RUID=13821181912903424" verbose parallels